-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add basic support for LeetCode #114
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,264 @@ | ||||||
""" | ||||||
the module for LeetCode (https://leetcode.com/) | ||||||
""" | ||||||
|
||||||
import time | ||||||
import urllib.parse | ||||||
from logging import getLogger | ||||||
from typing import * | ||||||
|
||||||
import bs4 | ||||||
import requests | ||||||
|
||||||
import onlinejudge._implementation.utils as utils | ||||||
import onlinejudge.type | ||||||
from onlinejudge.type import * | ||||||
|
||||||
logger = getLogger(__name__) | ||||||
|
||||||
|
||||||
class LeetCodeService(onlinejudge.type.Service): | ||||||
def get_url(self) -> str: | ||||||
return 'https://leetcode.com/' | ||||||
|
||||||
def get_name(self) -> str: | ||||||
return 'LeetCode' | ||||||
|
||||||
def _set_request_header(self, session: Optional[requests.Session] = None) -> requests.Session: | ||||||
session = session or utils.get_default_session() | ||||||
|
||||||
service_url = self.get_url() | ||||||
session.headers.update({ | ||||||
'Origin': service_url, | ||||||
'Referer': service_url, | ||||||
'Content-type': 'application/json', | ||||||
}) | ||||||
|
||||||
# get csrf token from cookies and set it to header as well | ||||||
for cookie in session.cookies: | ||||||
if cookie.domain == 'leetcode.com' and cookie.name == 'csrftoken': | ||||||
if cookie.value is not None: | ||||||
session.headers.update({ | ||||||
'X-CSRFToken': cookie.value, | ||||||
}) | ||||||
break | ||||||
|
||||||
return session | ||||||
|
||||||
@classmethod | ||||||
def from_url(cls, url: str) -> Optional['LeetCodeService']: | ||||||
# example: https://leetcode.com/ | ||||||
result = urllib.parse.urlparse(url) | ||||||
if result.scheme not in ('', 'http', 'https'): | ||||||
return None | ||||||
if result.netloc != 'leetcode.com': | ||||||
return None | ||||||
return cls() | ||||||
|
||||||
def get_url_of_login_page(self) -> str: | ||||||
return f'{self.get_url()}accounts/login/' | ||||||
|
||||||
def is_logged_in(self, *, session: Optional[requests.Session] = None) -> bool: | ||||||
session = self._set_request_header(session) | ||||||
json_body = { | ||||||
'operationName': 'globalData', | ||||||
'query': '\n'.join([ | ||||||
'query globalData {', | ||||||
' userStatus {', | ||||||
' isSignedIn', | ||||||
' }', | ||||||
'}', | ||||||
]), | ||||||
} | ||||||
resp = utils.request('POST', 'https://leetcode.com/graphql', session=session, json=json_body) | ||||||
json_resp = resp.json() | ||||||
return json_resp['data']['userStatus']['isSignedIn'] | ||||||
|
||||||
|
||||||
class LeetCodeProblem(onlinejudge.type.Problem): | ||||||
""" | ||||||
:ivar title_slug: :py:class:`str` | ||||||
""" | ||||||
def __init__(self, *, title_slug: str): | ||||||
self.title_slug = title_slug | ||||||
|
||||||
def _set_request_header(self, session: Optional[requests.Session] = None) -> requests.Session: | ||||||
service = self.get_service() | ||||||
session = service._set_request_header(session) | ||||||
service_url = service.get_url() | ||||||
session.headers.update({ | ||||||
'Referer': f'{service_url}problems/{self.title_slug}/', | ||||||
}) | ||||||
return session | ||||||
|
||||||
# TODO: enable to get premium only questions as well | ||||||
def download_sample_cases(self, *, session: Optional[requests.Session] = None) -> List[TestCase]: | ||||||
session = self._set_request_header(session) | ||||||
json_body = { | ||||||
'operationName': 'getQuestionDetail', | ||||||
'query': '\n'.join([ | ||||||
'query getQuestionDetail($titleSlug: String!) {', | ||||||
' question(titleSlug: $titleSlug) {', | ||||||
' content', | ||||||
' }', | ||||||
'}', | ||||||
]), | ||||||
'variables': { | ||||||
'titleSlug': self.title_slug | ||||||
}, | ||||||
} | ||||||
|
||||||
resp = utils.request('POST', 'https://leetcode.com/graphql', session=session, json=json_body) | ||||||
json_resp = resp.json() | ||||||
content_html = json_resp['data']['question']['content'] | ||||||
if content_html is None: | ||||||
logger.warning("This problem seems to be locked: need premium?") | ||||||
return [] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 実は api-client/onlinejudge/type.py Line 106 in a58d7d8
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 承知しました、確認します! |
||||||
soup = bs4.BeautifulSoup(content_html, utils.html_parser) | ||||||
test_cases = [] | ||||||
|
||||||
for num, pre in enumerate(soup.find_all('pre')): | ||||||
children = pre.contents | ||||||
idx, input_data, output_data = 0, '', '' | ||||||
|
||||||
# find input data | ||||||
while (idx < len(children) and (children[idx].name != 'strong' or len(children[idx].contents) != 1 or 'input' not in children[idx].contents[0].lower())): | ||||||
idx += 1 | ||||||
idx += 1 | ||||||
if idx < len(children): | ||||||
input_data = children[idx].strip() | ||||||
|
||||||
# find output data | ||||||
while (idx < len(children) and (children[idx].name != 'strong' or len(children[idx].contents) != 1 or 'output' not in children[idx].contents[0].lower())): | ||||||
idx += 1 | ||||||
idx += 1 | ||||||
if idx < len(children): | ||||||
output_data = children[idx].strip() | ||||||
|
||||||
if input_data and output_data: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. なるほどです。
というふうにOutputが2つ書かれていて、2つめはOutputではなく説明になっていたりします。
のようになっています) ほとんどの場合、テストケースは
もしくは
と書かれているかと思うので、これを想定して、そうでない場合にErrorとするのもありかなと思うのですが、その場合、いきなり第1問のサンプルケースが取得できないこともあり、判断に迷っています。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 第1問からエラーになるのはつらいというのはそうですね。どういうフォーマットがどれくらいの割合なのかは私は分からない (LeetCode にはほとんど参加していない) ので、警告の表示だけかエラーで落とすかの判断は @usk83 さんに任せます。 |
||||||
test_cases.append(TestCase( | ||||||
f'Example {num + 1}', | ||||||
'Input', | ||||||
input_data.encode(), | ||||||
'Output', | ||||||
output_data.encode(), | ||||||
)) | ||||||
return test_cases | ||||||
|
||||||
def get_available_languages(self, *, session: Optional[requests.Session] = None) -> List[Language]: | ||||||
session = self._set_request_header(session) | ||||||
json_body = { | ||||||
'operationName': 'getQuestionDetail', | ||||||
'query': '\n'.join([ | ||||||
'query getQuestionDetail($titleSlug: String!) {', | ||||||
' question(titleSlug: $titleSlug) {', | ||||||
' codeSnippets {', | ||||||
' lang', | ||||||
' langSlug', | ||||||
' }', | ||||||
' }', | ||||||
'}', | ||||||
]), | ||||||
'variables': { | ||||||
'titleSlug': self.title_slug | ||||||
}, | ||||||
} | ||||||
|
||||||
resp = utils.request('POST', 'https://leetcode.com/graphql', session=session, json=json_body) | ||||||
json_resp = resp.json() | ||||||
code_snippets = json_resp['data']['question']['codeSnippets'] | ||||||
languages = [] # type: List[Language] | ||||||
for code_definition in code_snippets: | ||||||
languages.append(Language(code_definition['langSlug'], code_definition['lang'])) | ||||||
return languages | ||||||
|
||||||
def submit_code(self, code: bytes, language_id: LanguageId, *, filename: Optional[str] = None, session: Optional[requests.Session] = None) -> onlinejudge.type.Submission: | ||||||
""" | ||||||
:raises NotLoggedInError: | ||||||
:raises SubmissionError: | ||||||
""" | ||||||
|
||||||
if not self.get_service().is_logged_in(session=session): | ||||||
logger.error('not logged in or session expired') | ||||||
raise NotLoggedInError | ||||||
|
||||||
session = self._set_request_header(session) | ||||||
|
||||||
# get questionId | ||||||
json_body = { | ||||||
'operationName': 'getQuestionDetail', | ||||||
'query': '\n'.join([ | ||||||
'query getQuestionDetail($titleSlug: String!) {', | ||||||
' question(titleSlug: $titleSlug) {', | ||||||
' questionId', | ||||||
' }', | ||||||
'}', | ||||||
]), | ||||||
'variables': { | ||||||
'titleSlug': self.title_slug | ||||||
}, | ||||||
} | ||||||
resp = utils.request('POST', 'https://leetcode.com/graphql', session=session, json=json_body) | ||||||
json_resp = resp.json() | ||||||
questionId = json_resp['data']['question']['questionId'] | ||||||
|
||||||
# submit code | ||||||
json_body = { | ||||||
'lang': language_id, | ||||||
'question_id': questionId, | ||||||
'typed_code': code.decode(), | ||||||
} | ||||||
retry_count = 5 | ||||||
while True: | ||||||
try: | ||||||
resp = utils.request('POST', f'https://leetcode.com/problems/{self.title_slug}/submit/', session=session, json=json_body) | ||||||
except requests.exceptions.HTTPError as e: | ||||||
if e.response.status_code != 429: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Too Many Requests はどの程度の頻度で起こりますか? web 上から手動で提出した場合でも提出頻度が原因のエラーは発生することはありますか? これが起こるのは api-client 側が原因なのか使うユーザが原因なのかどちらが多いですか? 「 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
なるほどです。
を考えると無理にretryはしないほうがいいかなという考えが少し強くなりました。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. はい。そういうことなら retry はしない方向がよさそうです。 |
||||||
raise SubmissionError from e | ||||||
elif retry_count == 0: | ||||||
logger.error('Failed 5 times to sumit your code: abort') | ||||||
raise SubmissionError from e | ||||||
else: | ||||||
retry_count -= 1 | ||||||
logger.warning('LeetCode\'s submission rate limit exceeded: try in 3 seconds') | ||||||
time.sleep(3) | ||||||
continue | ||||||
break | ||||||
json_resp = resp.json() | ||||||
submission_id = json_resp['submission_id'] | ||||||
|
||||||
# polling to the result | ||||||
while True: | ||||||
resp = utils.request('GET', f'https://leetcode.com/submissions/detail/{submission_id}/check/', session=session) | ||||||
json_resp = resp.json() | ||||||
if json_resp['state'] == 'SUCCESS': | ||||||
break | ||||||
logger.warning('Waiting for the result of your submission(id: %s)', submission_id) | ||||||
time.sleep(1 / 3) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: もうすこし長めに待ちたいです。とりあえず 1 秒待てば許されるはずなのでとりあえず 1 秒にしておきたい。長めに待機を挟むとたいていのユーザからは不満が出るのですが、online-judge-tools のような微妙にグレーなツールでは「サーバに無闇に負荷をかけることはありません」と言えることの方が重要です。
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. こちら実際のWebサイトのリクエストをみて、肌感同じくらいな感じに設定しました(実際にsubmitしたあと結果をpollingする実装になっている模様) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
そういうことなら 1/3 秒でも大丈夫だと思います。ただし、このことをコメントとして書いておいてほしいです。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 承知しました👍 |
||||||
|
||||||
result_url = f'https://leetcode.com/submissions/detail/{submission_id}/' | ||||||
logger.info('success: result: %s', result_url) | ||||||
return utils.DummySubmission(result_url, problem=self) | ||||||
|
||||||
def get_url(self) -> str: | ||||||
return f'https://leetcode.com/problems/{self.title_slug}/' | ||||||
|
||||||
@classmethod | ||||||
def from_url(cls, url: str) -> Optional['LeetCodeProblem']: | ||||||
# example: https://leetcode.com/problems/two-sum/ | ||||||
result = urllib.parse.urlparse(url) | ||||||
if result.scheme not in ('', 'http', 'https'): | ||||||
return None | ||||||
if result.netloc != 'leetcode.com': | ||||||
return None | ||||||
parts = utils.normpath(result.path).split('/')[1:] | ||||||
if len(parts) < 2 or parts[0] != 'problems': | ||||||
return None | ||||||
return cls(title_slug=parts[1]) | ||||||
|
||||||
def get_service(self) -> LeetCodeService: | ||||||
return LeetCodeService() | ||||||
|
||||||
|
||||||
onlinejudge.dispatch.services += [LeetCodeService] | ||||||
onlinejudge.dispatch.problems += [LeetCodeProblem] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
optional: session は連続して利用するのが期待されるものなので
session.headers
を書き換えるのはあまりきれいではありません。たとえば以下のように実行すると AtCoder 社のサーバに LeetCode 向けの CSRF token などが送られてしまってセキュリティ的に不適切です。utils.request(..., headers={'X-CSRFToken': ..., ...}, session=session)
のようにすればその headers が追加されて送られます。実際のところこの挙動が問題になることはないだろうので、ひとまずコメントに説明を書いておくだけでもかまいません。
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
これ気が回せていませんでした。そのとおりだと思います。
リクエスト時にheadersを渡す方向で考え直してみます。