Compare commits
11 Commits
97cad81c35
...
master
Author | SHA1 | Date | |
---|---|---|---|
d0ae823e26 | |||
d3b3e7abbc | |||
1ebef0083d | |||
92e16cc615
|
|||
3d51e2de8a
|
|||
c6600852ec
|
|||
a1c5e8ab1f
|
|||
1cd9fc859f | |||
9c85c34037
|
|||
|
8d8c8b0625 | ||
|
aaef99f731 |
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Codeforces scraper provides a gentle way to interact with Codeforces
|
Codeforces scraper provides a gentle way to interact with Codeforces
|
||||||
|
|
||||||
```
|
```python3
|
||||||
from codeforces_scraper import Scraper
|
from codeforces_scraper import Scraper
|
||||||
from codeforces_scraper.languages import some_compiler_by_ext
|
from codeforces_scraper.languages import some_compiler_by_ext
|
||||||
from getpass import getpass
|
from getpass import getpass
|
||||||
|
@@ -71,8 +71,8 @@ class APIModel(BaseModel):
|
|||||||
|
|
||||||
class JudgeProtocol(APIModel):
|
class JudgeProtocol(APIModel):
|
||||||
manual: bool
|
manual: bool
|
||||||
protocol: Optional[str]
|
protocol: Optional[str] = None
|
||||||
verdict: Optional[str]
|
verdict: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class BlogEntry(APIModel):
|
class BlogEntry(APIModel):
|
||||||
@@ -81,7 +81,7 @@ class BlogEntry(APIModel):
|
|||||||
creation_time_seconds: int
|
creation_time_seconds: int
|
||||||
author_handle: str
|
author_handle: str
|
||||||
title: str
|
title: str
|
||||||
content: Optional[str]
|
content: Optional[str] = None
|
||||||
locale: str
|
locale: str
|
||||||
modification_time_seconds: int
|
modification_time_seconds: int
|
||||||
allow_view_history: bool
|
allow_view_history: bool
|
||||||
@@ -95,7 +95,7 @@ class Comment(APIModel):
|
|||||||
commentator_handle: str
|
commentator_handle: str
|
||||||
locale: str
|
locale: str
|
||||||
text: str
|
text: str
|
||||||
parent_comment_id: Optional[int]
|
parent_comment_id: Optional[int] = None
|
||||||
rating: int
|
rating: int
|
||||||
|
|
||||||
|
|
||||||
@@ -126,26 +126,26 @@ class Member(APIModel):
|
|||||||
|
|
||||||
|
|
||||||
class Problem(APIModel):
|
class Problem(APIModel):
|
||||||
contest_id: Optional[int]
|
contest_id: Optional[int] = None
|
||||||
problem_set_name: Optional[str]
|
problem_set_name: Optional[str] = None
|
||||||
index: str
|
index: str
|
||||||
name: str
|
name: str
|
||||||
type: str
|
type: str
|
||||||
points: Optional[float]
|
points: Optional[float] = None
|
||||||
rating: Optional[int]
|
rating: Optional[int] = None
|
||||||
tags: List[str]
|
tags: List[str]
|
||||||
|
|
||||||
|
|
||||||
class User(APIModel):
|
class User(APIModel):
|
||||||
handle: str
|
handle: str
|
||||||
email: Optional[str]
|
email: Optional[str] = None
|
||||||
vk_id: Optional[str]
|
vk_id: Optional[str] = None
|
||||||
open_id: Optional[str]
|
open_id: Optional[str] = None
|
||||||
first_name: Optional[str]
|
first_name: Optional[str] = None
|
||||||
last_name: Optional[str]
|
last_name: Optional[str] = None
|
||||||
country: Optional[str]
|
country: Optional[str] = None
|
||||||
city: Optional[str]
|
city: Optional[str] = None
|
||||||
organization: Optional[str]
|
organization: Optional[str] = None
|
||||||
contribution: int
|
contribution: int
|
||||||
rank: str
|
rank: str
|
||||||
rating: int
|
rating: int
|
||||||
@@ -162,11 +162,11 @@ class Party(APIModel):
|
|||||||
contest_id: int
|
contest_id: int
|
||||||
members: List[Member]
|
members: List[Member]
|
||||||
participant_type: str
|
participant_type: str
|
||||||
team_id: Optional[int]
|
team_id: Optional[int] = None
|
||||||
team_name: Optional[str]
|
team_name: Optional[str] = None
|
||||||
ghost: bool
|
ghost: bool
|
||||||
room: Optional[int]
|
room: Optional[int] = None
|
||||||
start_time_seconds: Optional[int]
|
start_time_seconds: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
class Submission(APIModel):
|
class Submission(APIModel):
|
||||||
@@ -177,12 +177,12 @@ class Submission(APIModel):
|
|||||||
problem: Problem
|
problem: Problem
|
||||||
author: Party
|
author: Party
|
||||||
programming_language: str
|
programming_language: str
|
||||||
verdict: Optional[Verdict]
|
verdict: Optional[Verdict] = None
|
||||||
testset: str
|
testset: str
|
||||||
passed_test_count: int
|
passed_test_count: int
|
||||||
time_consumed_millis: int
|
time_consumed_millis: int
|
||||||
memory_consumed_bytes: int
|
memory_consumed_bytes: int
|
||||||
points: Optional[float]
|
points: Optional[float] = None
|
||||||
|
|
||||||
|
|
||||||
class Contest(APIModel):
|
class Contest(APIModel):
|
||||||
@@ -192,17 +192,17 @@ class Contest(APIModel):
|
|||||||
phase: ContestPhase
|
phase: ContestPhase
|
||||||
frozen: bool
|
frozen: bool
|
||||||
duration_seconds: bool
|
duration_seconds: bool
|
||||||
start_time_seconds: Optional[int]
|
start_time_seconds: Optional[int] = None
|
||||||
relative_time_seconds: Optional[int]
|
relative_time_seconds: Optional[int] = None
|
||||||
prepared_by: Optional[str]
|
prepared_by: Optional[str] = None
|
||||||
website_url: Optional[str]
|
website_url: Optional[str] = None
|
||||||
description: Optional[str]
|
description: Optional[str] = None
|
||||||
difficulty: Optional[int]
|
difficulty: Optional[int] = None
|
||||||
kind: Optional[str]
|
kind: Optional[str] = None
|
||||||
icpc_region: Optional[str]
|
icpc_region: Optional[str] = None
|
||||||
country: Optional[str]
|
country: Optional[str] = None
|
||||||
city: Optional[str]
|
city: Optional[str] = None
|
||||||
season: Optional[str]
|
season: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class Hack(APIModel):
|
class Hack(APIModel):
|
||||||
@@ -211,8 +211,8 @@ class Hack(APIModel):
|
|||||||
hacker: Party
|
hacker: Party
|
||||||
defender: Party
|
defender: Party
|
||||||
problem: Problem
|
problem: Problem
|
||||||
test: Optional[str]
|
test: Optional[str] = None
|
||||||
judge_protocol = JudgeProtocol
|
judge_protocol: JudgeProtocol
|
||||||
|
|
||||||
|
|
||||||
class ProblemResult(APIModel):
|
class ProblemResult(APIModel):
|
||||||
@@ -232,3 +232,8 @@ class RanklistRow(APIModel):
|
|||||||
unsuccessful_hack_count: int
|
unsuccessful_hack_count: int
|
||||||
problem_result: List[ProblemResult]
|
problem_result: List[ProblemResult]
|
||||||
last_submission_time_seconds: int
|
last_submission_time_seconds: int
|
||||||
|
|
||||||
|
|
||||||
|
class Sample(BaseModel):
|
||||||
|
s_in: str
|
||||||
|
s_out: str
|
||||||
|
@@ -3,11 +3,10 @@ import requests
|
|||||||
from requests import Session
|
from requests import Session
|
||||||
from bs4 import BeautifulSoup as bs
|
from bs4 import BeautifulSoup as bs
|
||||||
|
|
||||||
from codeforces_scraper.utils import get_token, get_messages, create_jar
|
from codeforces_scraper.utils import get_token, get_messages, create_jar, unfuck_multitest_sample
|
||||||
from codeforces_scraper.models import Submission, Problem
|
from codeforces_scraper.models import Submission, Problem, Sample
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from functools import reduce
|
|
||||||
|
|
||||||
BASE_URL = 'https://codeforces.com'
|
BASE_URL = 'https://codeforces.com'
|
||||||
|
|
||||||
@@ -21,7 +20,7 @@ class MessagedScrapError(ScraperError):
|
|||||||
self.codeforces_message = codeforces_message
|
self.codeforces_message = codeforces_message
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
f'Codeforces returned message, which is not considered as good: {self.codeforces_message}'
|
return f'Codeforces returned message, which is not considered as good: {self.codeforces_message}'
|
||||||
|
|
||||||
|
|
||||||
class CodeforcesAPIException(ScraperError):
|
class CodeforcesAPIException(ScraperError):
|
||||||
@@ -115,8 +114,10 @@ class Scraper:
|
|||||||
raise ScraperError('Submitting while not logged in')
|
raise ScraperError('Submitting while not logged in')
|
||||||
url = f'contest/{contest_id}/submit'
|
url = f'contest/{contest_id}/submit'
|
||||||
submit_page_response = self.get(url)
|
submit_page_response = self.get(url)
|
||||||
for message in get_messages(submit_page_response):
|
# FIXME: Now some pornography is in the messages, which is not displayed and
|
||||||
raise MessagedScrapError(message)
|
# is not an error
|
||||||
|
# for message in get_messages(submit_page_response):
|
||||||
|
# raise MessagedScrapError(message)
|
||||||
token = get_token(submit_page_response)
|
token = get_token(submit_page_response)
|
||||||
payload = {
|
payload = {
|
||||||
'csrf_token': token,
|
'csrf_token': token,
|
||||||
@@ -145,18 +146,6 @@ class Scraper:
|
|||||||
}
|
}
|
||||||
self.post(url, data=payload)
|
self.post(url, data=payload)
|
||||||
|
|
||||||
def scrap_submissions(self, contest_id: int) -> List[Submission]:
|
|
||||||
if self.current_user is None:
|
|
||||||
raise ScraperError('Submitting while not logged in')
|
|
||||||
url = f'contest/{contest_id}/my'
|
|
||||||
page_response = self.get(url)
|
|
||||||
soup = bs(page_response.text, 'lxml')
|
|
||||||
tables = soup.find_all('table', attrs={'class': 'status-frame-datatable'})
|
|
||||||
tbodys = [table.find('tbody') for table in tables]
|
|
||||||
rows = [tbody.find_all('tr', attrs={'class': 'highlighted-row'}) for tbody in tbodys]
|
|
||||||
rows = reduce(lambda x, y: x + y, rows)
|
|
||||||
return rows
|
|
||||||
|
|
||||||
def get_submission_source(self, contest_id: int, submission_id: int) -> str:
|
def get_submission_source(self, contest_id: int, submission_id: int) -> str:
|
||||||
"""Get source code of submission by ``contest_id`` and ``submission_id``"""
|
"""Get source code of submission by ``contest_id`` and ``submission_id``"""
|
||||||
url = f'contest/{contest_id}/submission/{submission_id}'
|
url = f'contest/{contest_id}/submission/{submission_id}'
|
||||||
@@ -180,7 +169,7 @@ class Scraper:
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
params = {'contestId': contest_id}
|
params = {'contestId': contest_id}
|
||||||
return [Submission.parse_obj(x) for x in self.api_request('contest.status', params)]
|
return [Submission.model_validate(x) for x in self.api_request('contest.status', params)]
|
||||||
|
|
||||||
def get_contest_tasks(self, contest_id: int) -> List[Problem]:
|
def get_contest_tasks(self, contest_id: int) -> List[Problem]:
|
||||||
"""Get all tasks in contest with id ``contest_id``"""
|
"""Get all tasks in contest with id ``contest_id``"""
|
||||||
@@ -190,6 +179,16 @@ class Scraper:
|
|||||||
}
|
}
|
||||||
return self.api_request('contest.standings', params)['problems']
|
return self.api_request('contest.standings', params)['problems']
|
||||||
|
|
||||||
|
def get_samples(self, contest_id: int, problem_index: str) -> List[Sample]:
|
||||||
|
url = f'contest/{contest_id}/problem/{problem_index}'
|
||||||
|
page_response = self.get(url)
|
||||||
|
soup = bs(page_response.text, 'lxml')
|
||||||
|
samples = soup.find(attrs={'class': 'sample-tests'}).find(attrs={'class': 'sample-test'})
|
||||||
|
inputs = [unfuck_multitest_sample(str(div_input.find(name='pre')))
|
||||||
|
for div_input in samples.find_all(attrs={'class': 'input'})]
|
||||||
|
outputs = [div_output.find(name='pre').get_text() for div_output in samples.find_all(attrs={'class', 'output'})]
|
||||||
|
return [Sample(s_in=s_in, s_out=s_out) for (s_in, s_out) in zip(inputs, outputs)]
|
||||||
|
|
||||||
def get(self, sub_url='', **kwargs):
|
def get(self, sub_url='', **kwargs):
|
||||||
"""Make a GET request to BASE_URL"""
|
"""Make a GET request to BASE_URL"""
|
||||||
url = self.base_url + '/' + sub_url
|
url = self.base_url + '/' + sub_url
|
||||||
@@ -208,7 +207,13 @@ class Scraper:
|
|||||||
|
|
||||||
def api_request(self, method: str, params):
|
def api_request(self, method: str, params):
|
||||||
"""Make a request to Codeforces API with ``params``"""
|
"""Make a request to Codeforces API with ``params``"""
|
||||||
response = self.get(f'api/{method}', params=params).json()
|
resp = self.get(f'api/{method}', params=params)
|
||||||
|
try:
|
||||||
|
response = resp.json()
|
||||||
|
except ValueError:
|
||||||
|
# It actually had already happened when Mike
|
||||||
|
# decided to turn off API and return HTML instead
|
||||||
|
raise ScraperError('API returned invalid JSON')
|
||||||
if response['status'] == 'FAILED':
|
if response['status'] == 'FAILED':
|
||||||
raise CodeforcesAPIException(response['comment'])
|
raise CodeforcesAPIException(response['comment'])
|
||||||
return response['result']
|
return response['result']
|
||||||
|
@@ -8,6 +8,15 @@ MESSAGE_GREP_STRING = r'Codeforces\.showMessage\('
|
|||||||
# TODO: Grep for Codeforces.showMessage(" to find message, that has been sent
|
# TODO: Grep for Codeforces.showMessage(" to find message, that has been sent
|
||||||
|
|
||||||
|
|
||||||
|
def unfuck_multitest_sample(sample_input: str) -> str:
|
||||||
|
div_class_regex = '<div class="[a-zA-Z0-9- ]*">'
|
||||||
|
sample_input = re.sub(div_class_regex, '', sample_input)
|
||||||
|
sample_input = re.sub('</div>', '\n', sample_input)
|
||||||
|
sample_input = re.sub('<pre>', '', sample_input)
|
||||||
|
sample_input = re.sub('</pre>', '', sample_input)
|
||||||
|
return sample_input
|
||||||
|
|
||||||
|
|
||||||
def create_jar(str_cookie: str):
|
def create_jar(str_cookie: str):
|
||||||
cookies = str_cookie.split(';')
|
cookies = str_cookie.split(';')
|
||||||
d = {}
|
d = {}
|
||||||
@@ -27,6 +36,7 @@ def get_token(response: Response) -> str:
|
|||||||
return token
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
# FIXME: More robust way to find messages
|
||||||
def get_messages(response: Response) -> List[str]:
|
def get_messages(response: Response) -> List[str]:
|
||||||
text = response.text
|
text = response.text
|
||||||
return re.findall(fr'{MESSAGE_GREP_STRING}\"(.+?)\"', text)
|
return re.findall(fr'{MESSAGE_GREP_STRING}\"(.+?)\"', text)
|
||||||
|
2
setup.py
2
setup.py
@@ -2,7 +2,7 @@ import setuptools
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='codeforces-scraper',
|
name='codeforces-scraper',
|
||||||
version='0.1.0',
|
version='0.4.0',
|
||||||
author='thematdev',
|
author='thematdev',
|
||||||
author_email='thematdev@thematdev.org',
|
author_email='thematdev@thematdev.org',
|
||||||
description='Utility to do actions on codeforces',
|
description='Utility to do actions on codeforces',
|
||||||
|
Reference in New Issue
Block a user