-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathghrepo.py
187 lines (161 loc) · 6.63 KB
/
ghrepo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
"""Module containing GitHubRepo data class and functions to perform api calls"""
import re
from base64 import b64decode
from dataclasses import dataclass
from datetime import datetime
from typing import Literal
import httpx
import textstat # type: ignore
from unmarkdown import unmark
Severity = Literal["ok", "low", "high"]
Criteria = tuple[str, Severity]
@dataclass
class GitHubRepo:
"""Class that contains all relevant info about a GitHub repository"""
name: str
owner: str
url: str
created: datetime
description: str | None
topics: list[str] | None
license: str | None
updated: datetime
readme: str | None
readability: float | None
cff: str | None
@property
def description_check(self) -> Criteria:
"""Check that description is available"""
if self.description:
return "Description Available", "ok"
else:
return "No description", "high"
@property
def topics_check(self) -> Criteria:
"""Check that enough topics are available"""
if not self.topics:
return "No topics", "high"
if len(self.topics) < 3:
return "Less than 3 topics", "low"
topics_txt = ", ".join(self.topics)
return topics_txt, "ok"
@property
def license_check(self) -> Criteria:
"""Check that registered license is available."""
if not self.license:
return "No license", "high"
if self.license == "Other":
return self.license, "low"
return self.license, "ok"
@property
def last_update_check(self) -> Criteria:
"""Check that the repo was updated in the past year or two."""
days = (datetime.now(tz=self.updated.tzinfo) - self.updated).days
date_txt = self.updated.date().isoformat()
if days < 365:
return date_txt, "ok"
if days < 730:
return date_txt, "low"
return date_txt, "high"
@property
def cff_check(self) -> Criteria:
"""Check that the repo has a cff file."""
if self.cff:
return "CFF available", "ok"
return "No CFF file", "high"
@property
def readme_check(self) -> Criteria:
"""Check that the readme is available"""
if self.readme:
return "Readme available", "ok"
return "No readme", "high"
@property
def readability_check(self) -> Criteria:
"""Check that the readme is legible"""
if not self.readme:
return "No readme", "high"
if self.readability > 30: # type: ignore
return str(self.readability), "ok"
if self.readability > 20: # type: ignore
return str(self.readability), "low"
return str(self.readability), "high"
def readme_section_check(self, section: str) -> Criteria:
"""Check if the section occurs in the readme."""
if not self.readme:
has_section = False
else:
pattern = re.compile(f"\\# {section}", re.IGNORECASE)
has_section = pattern.search(self.readme) is not None
if has_section:
return f"{section} available", "ok"
return f"{section} not available", "high"
@classmethod
async def from_api_response(cls, response: dict, token: str | None = None):
"""Construct a repository data class from a GitHub api response dictionary."""
readme_txt = await get_readme(response["full_name"], token=token)
cff_txt = await get_cff(response["full_name"], token=token)
return cls(
name=response["name"],
owner=response["owner"]["login"],
url=response["html_url"],
created=datetime.fromisoformat(response["created_at"]),
description=response["description"],
topics=response["topics"] if "topics" in response else None,
license=response["license"]["name"] if response["license"] else None,
updated=datetime.fromisoformat(response["updated_at"]),
cff=cff_txt,
readme=readme_txt,
readability=compute_readability(readme_txt) if readme_txt is not None else None,
)
async def get_readme(full_name: str, token: str | None = None) -> str | None:
"""Get readme for a repository."""
api_url = f"https://api.github.com/repos/{full_name}/readme"
head = {"Authorization": f"Bearer {token}"} if token else {}
async with httpx.AsyncClient() as client:
readme_response = await client.get(api_url, headers=head)
if readme_response.status_code != 200:
return None
readme_b64 = readme_response.json().get("content")
return b64decode(readme_b64).decode()
async def get_cff(full_name: str, token: str | None = None) -> str | None:
api_urls = [
f"https://api.github.com/repos/{full_name}/contents/CITATION.cff",
f"https://api.github.com/repos/{full_name}/contents/citation.cff",
]
head = {"Authorization": f"Bearer {token}"} if token else {}
async with httpx.AsyncClient() as client:
for api_url in api_urls:
resp = await client.get(api_url, headers=head)
if resp.status_code == 200:
cff_b64 = resp.json().get("content")
return b64decode(cff_b64).decode()
return None
def compute_readability(readme_txt: str):
"""Compute readability from readme markdown text."""
# Strip markdown before computing readability.
readme_plain = unmark(readme_txt)
return textstat.textstat.flesch_reading_ease(readme_plain)
async def get_org_repos(org: str, token: str | None = None) -> list[GitHubRepo]:
"""Get all the public repos for an organisation."""
head = {"Authorization": f"Bearer {token}"} if token else {}
repos = []
async with httpx.AsyncClient() as client:
page = 1
while True:
url = f"https://api.github.com/orgs/{org}/repos?page={page}&per_page=100"
response = await client.get(url, headers=head)
if response.status_code == 200:
page_repos = response.json()
if not page_repos:
break
repos += [await GitHubRepo.from_api_response(repo, token) for repo in page_repos]
page += 1
else:
response.raise_for_status()
return repos
def find_token_expiration(token: str):
"""Return the time left on the GH personal access token."""
res = httpx.get("https://api.github.com/", headers={"Authorization": f"Bearer {token}"})
res.raise_for_status()
expiry = datetime.fromisoformat(res.headers["GitHub-Authentication-Token-Expiration"])
return expiry - datetime.now(tz=expiry.tzinfo)