Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
d0ae823e26 | |||
d3b3e7abbc | |||
1ebef0083d | |||
92e16cc615
|
@@ -71,8 +71,8 @@ class APIModel(BaseModel):
|
||||
|
||||
class JudgeProtocol(APIModel):
|
||||
manual: bool
|
||||
protocol: Optional[str]
|
||||
verdict: Optional[str]
|
||||
protocol: Optional[str] = None
|
||||
verdict: Optional[str] = None
|
||||
|
||||
|
||||
class BlogEntry(APIModel):
|
||||
@@ -81,7 +81,7 @@ class BlogEntry(APIModel):
|
||||
creation_time_seconds: int
|
||||
author_handle: str
|
||||
title: str
|
||||
content: Optional[str]
|
||||
content: Optional[str] = None
|
||||
locale: str
|
||||
modification_time_seconds: int
|
||||
allow_view_history: bool
|
||||
@@ -95,7 +95,7 @@ class Comment(APIModel):
|
||||
commentator_handle: str
|
||||
locale: str
|
||||
text: str
|
||||
parent_comment_id: Optional[int]
|
||||
parent_comment_id: Optional[int] = None
|
||||
rating: int
|
||||
|
||||
|
||||
@@ -126,26 +126,26 @@ class Member(APIModel):
|
||||
|
||||
|
||||
class Problem(APIModel):
|
||||
contest_id: Optional[int]
|
||||
problem_set_name: Optional[str]
|
||||
contest_id: Optional[int] = None
|
||||
problem_set_name: Optional[str] = None
|
||||
index: str
|
||||
name: str
|
||||
type: str
|
||||
points: Optional[float]
|
||||
rating: Optional[int]
|
||||
points: Optional[float] = None
|
||||
rating: Optional[int] = None
|
||||
tags: List[str]
|
||||
|
||||
|
||||
class User(APIModel):
|
||||
handle: str
|
||||
email: Optional[str]
|
||||
vk_id: Optional[str]
|
||||
open_id: Optional[str]
|
||||
first_name: Optional[str]
|
||||
last_name: Optional[str]
|
||||
country: Optional[str]
|
||||
city: Optional[str]
|
||||
organization: Optional[str]
|
||||
email: Optional[str] = None
|
||||
vk_id: Optional[str] = None
|
||||
open_id: Optional[str] = None
|
||||
first_name: Optional[str] = None
|
||||
last_name: Optional[str] = None
|
||||
country: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
organization: Optional[str] = None
|
||||
contribution: int
|
||||
rank: str
|
||||
rating: int
|
||||
@@ -162,11 +162,11 @@ class Party(APIModel):
|
||||
contest_id: int
|
||||
members: List[Member]
|
||||
participant_type: str
|
||||
team_id: Optional[int]
|
||||
team_name: Optional[str]
|
||||
team_id: Optional[int] = None
|
||||
team_name: Optional[str] = None
|
||||
ghost: bool
|
||||
room: Optional[int]
|
||||
start_time_seconds: Optional[int]
|
||||
room: Optional[int] = None
|
||||
start_time_seconds: Optional[int] = None
|
||||
|
||||
|
||||
class Submission(APIModel):
|
||||
@@ -177,12 +177,12 @@ class Submission(APIModel):
|
||||
problem: Problem
|
||||
author: Party
|
||||
programming_language: str
|
||||
verdict: Optional[Verdict]
|
||||
verdict: Optional[Verdict] = None
|
||||
testset: str
|
||||
passed_test_count: int
|
||||
time_consumed_millis: int
|
||||
memory_consumed_bytes: int
|
||||
points: Optional[float]
|
||||
points: Optional[float] = None
|
||||
|
||||
|
||||
class Contest(APIModel):
|
||||
@@ -192,17 +192,17 @@ class Contest(APIModel):
|
||||
phase: ContestPhase
|
||||
frozen: bool
|
||||
duration_seconds: bool
|
||||
start_time_seconds: Optional[int]
|
||||
relative_time_seconds: Optional[int]
|
||||
prepared_by: Optional[str]
|
||||
website_url: Optional[str]
|
||||
description: Optional[str]
|
||||
difficulty: Optional[int]
|
||||
kind: Optional[str]
|
||||
icpc_region: Optional[str]
|
||||
country: Optional[str]
|
||||
city: Optional[str]
|
||||
season: Optional[str]
|
||||
start_time_seconds: Optional[int] = None
|
||||
relative_time_seconds: Optional[int] = None
|
||||
prepared_by: Optional[str] = None
|
||||
website_url: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
difficulty: Optional[int] = None
|
||||
kind: Optional[str] = None
|
||||
icpc_region: Optional[str] = None
|
||||
country: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
season: Optional[str] = None
|
||||
|
||||
|
||||
class Hack(APIModel):
|
||||
@@ -211,8 +211,8 @@ class Hack(APIModel):
|
||||
hacker: Party
|
||||
defender: Party
|
||||
problem: Problem
|
||||
test: Optional[str]
|
||||
judge_protocol = JudgeProtocol
|
||||
test: Optional[str] = None
|
||||
judge_protocol: JudgeProtocol
|
||||
|
||||
|
||||
class ProblemResult(APIModel):
|
||||
@@ -232,3 +232,8 @@ class RanklistRow(APIModel):
|
||||
unsuccessful_hack_count: int
|
||||
problem_result: List[ProblemResult]
|
||||
last_submission_time_seconds: int
|
||||
|
||||
|
||||
class Sample(BaseModel):
|
||||
s_in: str
|
||||
s_out: str
|
||||
|
@@ -3,8 +3,8 @@ import requests
|
||||
from requests import Session
|
||||
from bs4 import BeautifulSoup as bs
|
||||
|
||||
from codeforces_scraper.utils import get_token, get_messages, create_jar
|
||||
from codeforces_scraper.models import Submission, Problem
|
||||
from codeforces_scraper.utils import get_token, get_messages, create_jar, unfuck_multitest_sample
|
||||
from codeforces_scraper.models import Submission, Problem, Sample
|
||||
from typing import List
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ class Scraper:
|
||||
if self.current_user is not None:
|
||||
raise ScraperError('Failed to logout!')
|
||||
return
|
||||
|
||||
|
||||
def get_csrf_token(self):
|
||||
"""Get csrf token, which is needed
|
||||
to make requests by hand
|
||||
@@ -114,8 +114,10 @@ class Scraper:
|
||||
raise ScraperError('Submitting while not logged in')
|
||||
url = f'contest/{contest_id}/submit'
|
||||
submit_page_response = self.get(url)
|
||||
for message in get_messages(submit_page_response):
|
||||
raise MessagedScrapError(message)
|
||||
# FIXME: Now some pornography is in the messages, which is not displayed and
|
||||
# is not an error
|
||||
# for message in get_messages(submit_page_response):
|
||||
# raise MessagedScrapError(message)
|
||||
token = get_token(submit_page_response)
|
||||
payload = {
|
||||
'csrf_token': token,
|
||||
@@ -167,7 +169,7 @@ class Scraper:
|
||||
}
|
||||
else:
|
||||
params = {'contestId': contest_id}
|
||||
return [Submission.parse_obj(x) for x in self.api_request('contest.status', params)]
|
||||
return [Submission.model_validate(x) for x in self.api_request('contest.status', params)]
|
||||
|
||||
def get_contest_tasks(self, contest_id: int) -> List[Problem]:
|
||||
"""Get all tasks in contest with id ``contest_id``"""
|
||||
@@ -177,6 +179,16 @@ class Scraper:
|
||||
}
|
||||
return self.api_request('contest.standings', params)['problems']
|
||||
|
||||
def get_samples(self, contest_id: int, problem_index: str) -> List[Sample]:
|
||||
url = f'contest/{contest_id}/problem/{problem_index}'
|
||||
page_response = self.get(url)
|
||||
soup = bs(page_response.text, 'lxml')
|
||||
samples = soup.find(attrs={'class': 'sample-tests'}).find(attrs={'class': 'sample-test'})
|
||||
inputs = [unfuck_multitest_sample(str(div_input.find(name='pre')))
|
||||
for div_input in samples.find_all(attrs={'class': 'input'})]
|
||||
outputs = [div_output.find(name='pre').get_text() for div_output in samples.find_all(attrs={'class', 'output'})]
|
||||
return [Sample(s_in=s_in, s_out=s_out) for (s_in, s_out) in zip(inputs, outputs)]
|
||||
|
||||
def get(self, sub_url='', **kwargs):
|
||||
"""Make a GET request to BASE_URL"""
|
||||
url = self.base_url + '/' + sub_url
|
||||
|
@@ -8,6 +8,15 @@ MESSAGE_GREP_STRING = r'Codeforces\.showMessage\('
|
||||
# TODO: Grep for Codeforces.showMessage(" to find message, that has been sent
|
||||
|
||||
|
||||
def unfuck_multitest_sample(sample_input: str) -> str:
|
||||
div_class_regex = '<div class="[a-zA-Z0-9- ]*">'
|
||||
sample_input = re.sub(div_class_regex, '', sample_input)
|
||||
sample_input = re.sub('</div>', '\n', sample_input)
|
||||
sample_input = re.sub('<pre>', '', sample_input)
|
||||
sample_input = re.sub('</pre>', '', sample_input)
|
||||
return sample_input
|
||||
|
||||
|
||||
def create_jar(str_cookie: str):
|
||||
cookies = str_cookie.split(';')
|
||||
d = {}
|
||||
@@ -27,6 +36,7 @@ def get_token(response: Response) -> str:
|
||||
return token
|
||||
|
||||
|
||||
# FIXME: More robust way to find messages
|
||||
def get_messages(response: Response) -> List[str]:
|
||||
text = response.text
|
||||
return re.findall(fr'{MESSAGE_GREP_STRING}\"(.+?)\"', text)
|
||||
|
Reference in New Issue
Block a user