diff --git a/codeforces_scraper/models.py b/codeforces_scraper/models.py index 665b2ab..d3d1861 100644 --- a/codeforces_scraper/models.py +++ b/codeforces_scraper/models.py @@ -232,3 +232,8 @@ class RanklistRow(APIModel): unsuccessful_hack_count: int problem_result: List[ProblemResult] last_submission_time_seconds: int + + +class Sample(BaseModel): + s_in: str + s_out: str diff --git a/codeforces_scraper/scraper.py b/codeforces_scraper/scraper.py index cd2866f..9c37621 100644 --- a/codeforces_scraper/scraper.py +++ b/codeforces_scraper/scraper.py @@ -3,8 +3,8 @@ import requests from requests import Session from bs4 import BeautifulSoup as bs -from codeforces_scraper.utils import get_token, get_messages, create_jar -from codeforces_scraper.models import Submission, Problem +from codeforces_scraper.utils import get_token, get_messages, create_jar, unfuck_multitest_sample +from codeforces_scraper.models import Submission, Problem, Sample from typing import List @@ -62,7 +62,7 @@ class Scraper: if self.current_user is not None: raise ScraperError('Failed to logout!') return - + def get_csrf_token(self): """Get csrf token, which is needed to make requests by hand @@ -114,8 +114,10 @@ class Scraper: raise ScraperError('Submitting while not logged in') url = f'contest/{contest_id}/submit' submit_page_response = self.get(url) - for message in get_messages(submit_page_response): - raise MessagedScrapError(message) + # FIXME: Now some pornography is in the messages, which is not displayed and + # is not an error + # for message in get_messages(submit_page_response): + # raise MessagedScrapError(message) token = get_token(submit_page_response) payload = { 'csrf_token': token, @@ -177,6 +179,16 @@ class Scraper: } return self.api_request('contest.standings', params)['problems'] + def get_samples(self, contest_id: int, problem_index: str) -> List[Sample]: + url = f'contest/{contest_id}/problem/{problem_index}' + page_response = self.get(url) + soup = bs(page_response.text, 'lxml') + samples = soup.find(attrs={'class': 'sample-tests'}).find(attrs={'class': 'sample-test'}) + inputs = [unfuck_multitest_sample(str(div_input.find(name='pre'))) + for div_input in samples.find_all(attrs={'class': 'input'})] + outputs = [div_output.find(name='pre').get_text() for div_output in samples.find_all(attrs={'class', 'output'})] + return [Sample(s_in=s_in, s_out=s_out) for (s_in, s_out) in zip(inputs, outputs)] + def get(self, sub_url='', **kwargs): """Make a GET request to BASE_URL""" url = self.base_url + '/' + sub_url diff --git a/codeforces_scraper/utils.py b/codeforces_scraper/utils.py index 6300fdd..cfd1298 100644 --- a/codeforces_scraper/utils.py +++ b/codeforces_scraper/utils.py @@ -8,6 +8,15 @@ MESSAGE_GREP_STRING = r'Codeforces\.showMessage\(' # TODO: Grep for Codeforces.showMessage(" to find message, that has been sent +def unfuck_multitest_sample(sample_input: str) -> str: + div_class_regex = '
' + sample_input = re.sub(div_class_regex, '', sample_input) + sample_input = re.sub('
', '\n', sample_input) + sample_input = re.sub('
', '', sample_input)
+    sample_input = re.sub('
', '', sample_input) + return sample_input + + def create_jar(str_cookie: str): cookies = str_cookie.split(';') d = {} @@ -27,6 +36,7 @@ def get_token(response: Response) -> str: return token +# FIXME: More robust way to find messages def get_messages(response: Response) -> List[str]: text = response.text return re.findall(fr'{MESSAGE_GREP_STRING}\"(.+?)\"', text) diff --git a/setup.py b/setup.py index 17bae76..42345c8 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import setuptools setuptools.setup( name='codeforces-scraper', - version='0.3.0', + version='0.3.1', author='thematdev', author_email='thematdev@thematdev.org', description='Utility to do actions on codeforces',