Compare commits

11 Commits

Author SHA1 Message Date
d0ae823e26 Finally bump pydantic version 2024-04-03 22:25:22 +03:00
d3b3e7abbc Bump version 2024-04-03 21:56:31 +03:00
1ebef0083d Fix critical bug in models.py 2024-04-03 21:54:03 +03:00
92e16cc615 Fetch samples feature + message submit bug fix 2023-08-13 17:52:38 +03:00
3d51e2de8a Split termforces into other package 2023-02-17 18:56:02 +03:00
c6600852ec Update version in setup.py 2023-02-02 21:39:42 +03:00
a1c5e8ab1f Critical fix 2023-02-01 14:32:07 +03:00
1cd9fc859f Added termforces module 2023-02-01 14:16:50 +03:00
9c85c34037 Improve exception handling 2023-01-30 11:30:46 +03:00
thematdev
8d8c8b0625 Merge pull request #1 from ntoneee/patch-1
Add Python code syntax highlighting in README.md
2023-01-24 19:52:29 +00:00
Anton Egorov
aaef99f731 Add Python code syntax highlighting in README.md 2023-01-24 22:48:22 +03:00
5 changed files with 78 additions and 58 deletions

View File

@@ -2,7 +2,7 @@
Codeforces scraper provides a gentle way to interact with Codeforces Codeforces scraper provides a gentle way to interact with Codeforces
``` ```python3
from codeforces_scraper import Scraper from codeforces_scraper import Scraper
from codeforces_scraper.languages import some_compiler_by_ext from codeforces_scraper.languages import some_compiler_by_ext
from getpass import getpass from getpass import getpass

View File

@@ -71,8 +71,8 @@ class APIModel(BaseModel):
class JudgeProtocol(APIModel): class JudgeProtocol(APIModel):
manual: bool manual: bool
protocol: Optional[str] protocol: Optional[str] = None
verdict: Optional[str] verdict: Optional[str] = None
class BlogEntry(APIModel): class BlogEntry(APIModel):
@@ -81,7 +81,7 @@ class BlogEntry(APIModel):
creation_time_seconds: int creation_time_seconds: int
author_handle: str author_handle: str
title: str title: str
content: Optional[str] content: Optional[str] = None
locale: str locale: str
modification_time_seconds: int modification_time_seconds: int
allow_view_history: bool allow_view_history: bool
@@ -95,7 +95,7 @@ class Comment(APIModel):
commentator_handle: str commentator_handle: str
locale: str locale: str
text: str text: str
parent_comment_id: Optional[int] parent_comment_id: Optional[int] = None
rating: int rating: int
@@ -126,26 +126,26 @@ class Member(APIModel):
class Problem(APIModel): class Problem(APIModel):
contest_id: Optional[int] contest_id: Optional[int] = None
problem_set_name: Optional[str] problem_set_name: Optional[str] = None
index: str index: str
name: str name: str
type: str type: str
points: Optional[float] points: Optional[float] = None
rating: Optional[int] rating: Optional[int] = None
tags: List[str] tags: List[str]
class User(APIModel): class User(APIModel):
handle: str handle: str
email: Optional[str] email: Optional[str] = None
vk_id: Optional[str] vk_id: Optional[str] = None
open_id: Optional[str] open_id: Optional[str] = None
first_name: Optional[str] first_name: Optional[str] = None
last_name: Optional[str] last_name: Optional[str] = None
country: Optional[str] country: Optional[str] = None
city: Optional[str] city: Optional[str] = None
organization: Optional[str] organization: Optional[str] = None
contribution: int contribution: int
rank: str rank: str
rating: int rating: int
@@ -162,11 +162,11 @@ class Party(APIModel):
contest_id: int contest_id: int
members: List[Member] members: List[Member]
participant_type: str participant_type: str
team_id: Optional[int] team_id: Optional[int] = None
team_name: Optional[str] team_name: Optional[str] = None
ghost: bool ghost: bool
room: Optional[int] room: Optional[int] = None
start_time_seconds: Optional[int] start_time_seconds: Optional[int] = None
class Submission(APIModel): class Submission(APIModel):
@@ -177,12 +177,12 @@ class Submission(APIModel):
problem: Problem problem: Problem
author: Party author: Party
programming_language: str programming_language: str
verdict: Optional[Verdict] verdict: Optional[Verdict] = None
testset: str testset: str
passed_test_count: int passed_test_count: int
time_consumed_millis: int time_consumed_millis: int
memory_consumed_bytes: int memory_consumed_bytes: int
points: Optional[float] points: Optional[float] = None
class Contest(APIModel): class Contest(APIModel):
@@ -192,17 +192,17 @@ class Contest(APIModel):
phase: ContestPhase phase: ContestPhase
frozen: bool frozen: bool
duration_seconds: bool duration_seconds: bool
start_time_seconds: Optional[int] start_time_seconds: Optional[int] = None
relative_time_seconds: Optional[int] relative_time_seconds: Optional[int] = None
prepared_by: Optional[str] prepared_by: Optional[str] = None
website_url: Optional[str] website_url: Optional[str] = None
description: Optional[str] description: Optional[str] = None
difficulty: Optional[int] difficulty: Optional[int] = None
kind: Optional[str] kind: Optional[str] = None
icpc_region: Optional[str] icpc_region: Optional[str] = None
country: Optional[str] country: Optional[str] = None
city: Optional[str] city: Optional[str] = None
season: Optional[str] season: Optional[str] = None
class Hack(APIModel): class Hack(APIModel):
@@ -211,8 +211,8 @@ class Hack(APIModel):
hacker: Party hacker: Party
defender: Party defender: Party
problem: Problem problem: Problem
test: Optional[str] test: Optional[str] = None
judge_protocol = JudgeProtocol judge_protocol: JudgeProtocol
class ProblemResult(APIModel): class ProblemResult(APIModel):
@@ -232,3 +232,8 @@ class RanklistRow(APIModel):
unsuccessful_hack_count: int unsuccessful_hack_count: int
problem_result: List[ProblemResult] problem_result: List[ProblemResult]
last_submission_time_seconds: int last_submission_time_seconds: int
class Sample(BaseModel):
s_in: str
s_out: str

View File

@@ -3,11 +3,10 @@ import requests
from requests import Session from requests import Session
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
from codeforces_scraper.utils import get_token, get_messages, create_jar from codeforces_scraper.utils import get_token, get_messages, create_jar, unfuck_multitest_sample
from codeforces_scraper.models import Submission, Problem from codeforces_scraper.models import Submission, Problem, Sample
from typing import List from typing import List
from functools import reduce
BASE_URL = 'https://codeforces.com' BASE_URL = 'https://codeforces.com'
@@ -21,7 +20,7 @@ class MessagedScrapError(ScraperError):
self.codeforces_message = codeforces_message self.codeforces_message = codeforces_message
def __str__(self): def __str__(self):
f'Codeforces returned message, which is not considered as good: {self.codeforces_message}' return f'Codeforces returned message, which is not considered as good: {self.codeforces_message}'
class CodeforcesAPIException(ScraperError): class CodeforcesAPIException(ScraperError):
@@ -115,8 +114,10 @@ class Scraper:
raise ScraperError('Submitting while not logged in') raise ScraperError('Submitting while not logged in')
url = f'contest/{contest_id}/submit' url = f'contest/{contest_id}/submit'
submit_page_response = self.get(url) submit_page_response = self.get(url)
for message in get_messages(submit_page_response): # FIXME: Now some pornography is in the messages, which is not displayed and
raise MessagedScrapError(message) # is not an error
# for message in get_messages(submit_page_response):
# raise MessagedScrapError(message)
token = get_token(submit_page_response) token = get_token(submit_page_response)
payload = { payload = {
'csrf_token': token, 'csrf_token': token,
@@ -145,18 +146,6 @@ class Scraper:
} }
self.post(url, data=payload) self.post(url, data=payload)
def scrap_submissions(self, contest_id: int) -> List[Submission]:
if self.current_user is None:
raise ScraperError('Submitting while not logged in')
url = f'contest/{contest_id}/my'
page_response = self.get(url)
soup = bs(page_response.text, 'lxml')
tables = soup.find_all('table', attrs={'class': 'status-frame-datatable'})
tbodys = [table.find('tbody') for table in tables]
rows = [tbody.find_all('tr', attrs={'class': 'highlighted-row'}) for tbody in tbodys]
rows = reduce(lambda x, y: x + y, rows)
return rows
def get_submission_source(self, contest_id: int, submission_id: int) -> str: def get_submission_source(self, contest_id: int, submission_id: int) -> str:
"""Get source code of submission by ``contest_id`` and ``submission_id``""" """Get source code of submission by ``contest_id`` and ``submission_id``"""
url = f'contest/{contest_id}/submission/{submission_id}' url = f'contest/{contest_id}/submission/{submission_id}'
@@ -180,7 +169,7 @@ class Scraper:
} }
else: else:
params = {'contestId': contest_id} params = {'contestId': contest_id}
return [Submission.parse_obj(x) for x in self.api_request('contest.status', params)] return [Submission.model_validate(x) for x in self.api_request('contest.status', params)]
def get_contest_tasks(self, contest_id: int) -> List[Problem]: def get_contest_tasks(self, contest_id: int) -> List[Problem]:
"""Get all tasks in contest with id ``contest_id``""" """Get all tasks in contest with id ``contest_id``"""
@@ -190,6 +179,16 @@ class Scraper:
} }
return self.api_request('contest.standings', params)['problems'] return self.api_request('contest.standings', params)['problems']
def get_samples(self, contest_id: int, problem_index: str) -> List[Sample]:
url = f'contest/{contest_id}/problem/{problem_index}'
page_response = self.get(url)
soup = bs(page_response.text, 'lxml')
samples = soup.find(attrs={'class': 'sample-tests'}).find(attrs={'class': 'sample-test'})
inputs = [unfuck_multitest_sample(str(div_input.find(name='pre')))
for div_input in samples.find_all(attrs={'class': 'input'})]
outputs = [div_output.find(name='pre').get_text() for div_output in samples.find_all(attrs={'class', 'output'})]
return [Sample(s_in=s_in, s_out=s_out) for (s_in, s_out) in zip(inputs, outputs)]
def get(self, sub_url='', **kwargs): def get(self, sub_url='', **kwargs):
"""Make a GET request to BASE_URL""" """Make a GET request to BASE_URL"""
url = self.base_url + '/' + sub_url url = self.base_url + '/' + sub_url
@@ -208,7 +207,13 @@ class Scraper:
def api_request(self, method: str, params): def api_request(self, method: str, params):
"""Make a request to Codeforces API with ``params``""" """Make a request to Codeforces API with ``params``"""
response = self.get(f'api/{method}', params=params).json() resp = self.get(f'api/{method}', params=params)
try:
response = resp.json()
except ValueError:
# It actually had already happened when Mike
# decided to turn off API and return HTML instead
raise ScraperError('API returned invalid JSON')
if response['status'] == 'FAILED': if response['status'] == 'FAILED':
raise CodeforcesAPIException(response['comment']) raise CodeforcesAPIException(response['comment'])
return response['result'] return response['result']

View File

@@ -8,6 +8,15 @@ MESSAGE_GREP_STRING = r'Codeforces\.showMessage\('
# TODO: Grep for Codeforces.showMessage(" to find message, that has been sent # TODO: Grep for Codeforces.showMessage(" to find message, that has been sent
def unfuck_multitest_sample(sample_input: str) -> str:
div_class_regex = '<div class="[a-zA-Z0-9- ]*">'
sample_input = re.sub(div_class_regex, '', sample_input)
sample_input = re.sub('</div>', '\n', sample_input)
sample_input = re.sub('<pre>', '', sample_input)
sample_input = re.sub('</pre>', '', sample_input)
return sample_input
def create_jar(str_cookie: str): def create_jar(str_cookie: str):
cookies = str_cookie.split(';') cookies = str_cookie.split(';')
d = {} d = {}
@@ -27,6 +36,7 @@ def get_token(response: Response) -> str:
return token return token
# FIXME: More robust way to find messages
def get_messages(response: Response) -> List[str]: def get_messages(response: Response) -> List[str]:
text = response.text text = response.text
return re.findall(fr'{MESSAGE_GREP_STRING}\"(.+?)\"', text) return re.findall(fr'{MESSAGE_GREP_STRING}\"(.+?)\"', text)

View File

@@ -2,7 +2,7 @@ import setuptools
setuptools.setup( setuptools.setup(
name='codeforces-scraper', name='codeforces-scraper',
version='0.1.0', version='0.4.0',
author='thematdev', author='thematdev',
author_email='thematdev@thematdev.org', author_email='thematdev@thematdev.org',
description='Utility to do actions on codeforces', description='Utility to do actions on codeforces',