Skip to content

Commit 8878db3

Browse files
authored
Implement standings api method (#203)
Closes #198
1 parent dc29930 commit 8878db3

File tree

13 files changed

+1074
-15
lines changed

13 files changed

+1074
-15
lines changed

basketball_reference_web_scraper/client.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,37 @@
55
from basketball_reference_web_scraper.output.columns import BOX_SCORE_COLUMN_NAMES, SCHEDULE_COLUMN_NAMES, \
66
PLAYER_SEASON_TOTALS_COLUMN_NAMES, \
77
PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES, TEAM_BOX_SCORES_COLUMN_NAMES, PLAY_BY_PLAY_COLUMN_NAMES, \
8-
PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SEARCH_RESULTS_COLUMN_NAMES
8+
PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SEARCH_RESULTS_COLUMN_NAMES, STANDINGS_COLUMNS_NAMES
99
from basketball_reference_web_scraper.output.fields import format_value, BasketballReferenceJSONEncoder
1010
from basketball_reference_web_scraper.output.service import OutputService
11-
from basketball_reference_web_scraper.output.writers import CSVWriter, JSONWriter, FileOptions, OutputOptions, SearchCSVWriter
11+
from basketball_reference_web_scraper.output.writers import CSVWriter, JSONWriter, FileOptions, OutputOptions, \
12+
SearchCSVWriter
1213
from basketball_reference_web_scraper.parser_service import ParserService
1314

1415

16+
def standings(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
17+
json_options=None):
18+
try:
19+
http_service = HTTPService(parser=ParserService())
20+
values = http_service.standings(season_end_year=season_end_year)
21+
except requests.exceptions.HTTPError as http_error:
22+
if http_error.response.status_code == requests.codes.not_found:
23+
raise InvalidSeason(season_end_year=season_end_year)
24+
else:
25+
raise http_error
26+
options = OutputOptions.of(
27+
file_options=FileOptions.of(path=output_file_path, mode=output_write_option),
28+
output_type=output_type,
29+
json_options=json_options,
30+
csv_options={"column_names": STANDINGS_COLUMNS_NAMES}
31+
)
32+
output_service = OutputService(
33+
json_writer=JSONWriter(value_formatter=BasketballReferenceJSONEncoder),
34+
csv_writer=CSVWriter(value_formatter=format_value)
35+
)
36+
return output_service.output(data=values, options=options)
37+
38+
1539
def player_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
1640
json_options=None):
1741
try:

basketball_reference_web_scraper/data.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,31 @@ class League(Enum):
8585
BASKETBALL_ASSOCIATION_OF_AMERICA = "BASKETBALL_ASSOCIATION_OF_AMERICA"
8686

8787

88+
class Conference(Enum):
89+
EASTERN = "EASTERN"
90+
WESTERN = "WESTERN"
91+
92+
93+
class Division(Enum):
94+
ATLANTIC = "ATLANTIC"
95+
CENTRAL = "CENTRAL"
96+
MIDWEST = "MIDWEST"
97+
NORTHWEST = "NORTHWEST"
98+
PACIFIC = "PACIFIC"
99+
SOUTHEAST = "SOUTHEAST"
100+
SOUTHWEST = "SOUTHWEST"
101+
102+
103+
DIVISIONS_TO_CONFERENCES = {
104+
Division.ATLANTIC: Conference.EASTERN,
105+
Division.CENTRAL: Conference.EASTERN,
106+
Division.SOUTHEAST: Conference.EASTERN,
107+
Division.MIDWEST: Conference.WESTERN,
108+
Division.PACIFIC: Conference.WESTERN,
109+
Division.SOUTHWEST: Conference.WESTERN,
110+
}
111+
112+
88113
TEAM_ABBREVIATIONS_TO_TEAM = {
89114
'ATL': Team.ATLANTA_HAWKS,
90115
'BOS': Team.BOSTON_CELTICS,

basketball_reference_web_scraper/html.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ def rows(self):
716716
for row_html in self.html.xpath(self.rows_query)
717717
]
718718

719+
719720
class PlayerSeasonBoxScoresRow(PlayerBoxScoreRow):
720721
def __init__(self, html):
721722
super().__init__(html)
@@ -1128,3 +1129,101 @@ def totals_table(self):
11281129
return PlayerPageTotalsTable(html=totals_tables[0])
11291130

11301131
return None
1132+
1133+
1134+
class StandingsPage:
1135+
def __init__(self, html):
1136+
self.html = html
1137+
1138+
@property
1139+
def division_standings(self):
1140+
division_standings = self.html.xpath('.//div[@id="all_standings"]')
1141+
1142+
if len(division_standings) == 1:
1143+
return DivisionStandings(html=division_standings[0])
1144+
1145+
return None
1146+
1147+
1148+
class DivisionStandings:
1149+
def __init__(self, html):
1150+
self.html = html
1151+
1152+
@property
1153+
def eastern_conference_table(self):
1154+
tables = self.html.xpath('.//table[@id="divs_standings_E"]')
1155+
1156+
if len(tables) == 1:
1157+
return ConferenceDivisionStandingsTable(html=tables[0])
1158+
1159+
return None
1160+
1161+
@property
1162+
def western_conference_table(self):
1163+
tables = self.html.xpath('.//table[@id="divs_standings_W"]')
1164+
1165+
if len(tables) == 1:
1166+
return ConferenceDivisionStandingsTable(html=tables[0])
1167+
1168+
return None
1169+
1170+
1171+
class ConferenceDivisionStandingsTable:
1172+
def __init__(self, html):
1173+
self.html = html
1174+
1175+
@property
1176+
def rows(self):
1177+
return [
1178+
ConferenceDivisionStandingsRow(html=row_html)
1179+
for row_html in self.html.xpath('.//tbody/tr')
1180+
]
1181+
1182+
1183+
class ConferenceDivisionStandingsRow:
1184+
def __init__(self, html):
1185+
self.html = html
1186+
1187+
@property
1188+
def is_division_name_row(self):
1189+
return self.html.attrib['class'] == 'thead'
1190+
1191+
@property
1192+
def is_standings_row(self):
1193+
return self.html.attrib['class'] == 'full_table'
1194+
1195+
@property
1196+
def division_name(self):
1197+
cells = self.html.xpath('.//th')
1198+
1199+
if len(cells) == 1:
1200+
return cells[0].text_content()
1201+
1202+
return None
1203+
1204+
@property
1205+
def team_name(self):
1206+
cells = self.html.xpath('.//th[@data-stat="team_name"]')
1207+
1208+
if len(cells) == 1:
1209+
return cells[0].text_content()
1210+
1211+
return None
1212+
1213+
@property
1214+
def wins(self):
1215+
cells = self.html.xpath('.//td[@data-stat="wins"]')
1216+
1217+
if len(cells) == 1:
1218+
return cells[0].text_content()
1219+
1220+
return None
1221+
1222+
@property
1223+
def losses(self):
1224+
cells = self.html.xpath('.//td[@data-stat="losses"]')
1225+
1226+
if len(cells) == 1:
1227+
return cells[0].text_content()
1228+
1229+
return None

basketball_reference_web_scraper/http_service.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from basketball_reference_web_scraper.errors import InvalidDate, InvalidPlayerAndSeason
66
from basketball_reference_web_scraper.html import DailyLeadersPage, PlayerSeasonBoxScoresPage, PlayerSeasonTotalTable, \
77
PlayerAdvancedSeasonTotalsTable, PlayByPlayPage, SchedulePage, BoxScoresPage, DailyBoxScoresPage, SearchPage, \
8-
PlayerPage
8+
PlayerPage, StandingsPage
99

1010

1111
class HTTPService:
@@ -14,6 +14,20 @@ class HTTPService:
1414
def __init__(self, parser):
1515
self.parser = parser
1616

17+
def standings(self, season_end_year):
18+
url = '{BASE_URL}/leagues/NBA_{season_end_year}.html'.format(
19+
BASE_URL=HTTPService.BASE_URL,
20+
season_end_year=season_end_year,
21+
)
22+
23+
response = requests.get(url=url, allow_redirects=False)
24+
25+
response.raise_for_status()
26+
27+
page = StandingsPage(html=html.fromstring(response.content))
28+
return self.parser.parse_division_standings(standings=page.division_standings.eastern_conference_table.rows) + \
29+
self.parser.parse_division_standings(standings=page.division_standings.western_conference_table.rows)
30+
1731
def player_box_scores(self, day, month, year):
1832
url = '{BASE_URL}/friv/dailyleaders.cgi?month={month}&day={day}&year={year}'.format(
1933
BASE_URL=HTTPService.BASE_URL,
@@ -38,11 +52,11 @@ def regular_season_player_box_scores(self, player_identifier, season_end_year):
3852
# starting with first few characters of player's surname
3953
url = '{BASE_URL}/players/{player_surname_starting_character}/{player_identifier}/gamelog/{season_end_year}' \
4054
.format(
41-
BASE_URL=HTTPService.BASE_URL,
42-
player_surname_starting_character=player_identifier[0],
43-
player_identifier=player_identifier,
44-
season_end_year=season_end_year,
45-
)
55+
BASE_URL=HTTPService.BASE_URL,
56+
player_surname_starting_character=player_identifier[0],
57+
player_identifier=player_identifier,
58+
season_end_year=season_end_year,
59+
)
4660

4761
response = requests.get(url=url, allow_redirects=False)
4862
response.raise_for_status()
@@ -59,11 +73,11 @@ def playoff_player_box_scores(self, player_identifier, season_end_year):
5973
# starting with first few characters of player's surname
6074
url = '{BASE_URL}/players/{player_surname_starting_character}/{player_identifier}/gamelog/{season_end_year}' \
6175
.format(
62-
BASE_URL=HTTPService.BASE_URL,
63-
player_surname_starting_character=player_identifier[0],
64-
player_identifier=player_identifier,
65-
season_end_year=season_end_year,
66-
)
76+
BASE_URL=HTTPService.BASE_URL,
77+
player_surname_starting_character=player_identifier[0],
78+
player_identifier=player_identifier,
79+
season_end_year=season_end_year,
80+
)
6781

6882
response = requests.get(url=url, allow_redirects=False)
6983
response.raise_for_status()

basketball_reference_web_scraper/output/columns.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,11 @@
128128
"identifier",
129129
"leagues",
130130
]
131+
132+
STANDINGS_COLUMNS_NAMES = [
133+
"team",
134+
"wins",
135+
"losses",
136+
"division",
137+
"conference",
138+
]

basketball_reference_web_scraper/parser_service.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from basketball_reference_web_scraper.data import TEAM_ABBREVIATIONS_TO_TEAM, LOCATION_ABBREVIATIONS_TO_POSITION, OUTCOME_ABBREVIATIONS_TO_OUTCOME, TEAM_NAME_TO_TEAM, \
2-
POSITION_ABBREVIATIONS_TO_POSITION, LEAGUE_ABBREVIATIONS_TO_LEAGUE
2+
POSITION_ABBREVIATIONS_TO_POSITION, LEAGUE_ABBREVIATIONS_TO_LEAGUE, Division, Team, DIVISIONS_TO_CONFERENCES
33
from basketball_reference_web_scraper.parsers import PositionAbbreviationParser, TeamAbbreviationParser, \
44
PlayerSeasonTotalsParser, TeamTotalsParser, LocationAbbreviationParser, OutcomeAbbreviationParser, \
55
SecondsPlayedParser, PlayerBoxScoresParser, PlayerAdvancedSeasonTotalsParser, PeriodDetailsParser, \
66
PeriodTimestampParser, ScoresParser, PlayByPlaysParser, TeamNameParser, ScheduledStartTimeParser, \
77
ScheduledGamesParser, PlayerBoxScoreOutcomeParser, PlayerSeasonBoxScoresParser, SearchResultNameParser, \
8-
ResourceLocationParser, SearchResultsParser, LeagueAbbreviationParser, PlayerDataParser
8+
ResourceLocationParser, SearchResultsParser, LeagueAbbreviationParser, PlayerDataParser, DivisionNameParser, \
9+
TeamStandingsParser, ConferenceDivisionStandingsParser
910

1011

1112
class ParserService:
@@ -76,6 +77,16 @@ def __init__(self):
7677
league_abbreviation_parser=self.league_abbreviation_parser,
7778
)
7879
self.team_totals_parser = TeamTotalsParser(team_abbreviation_parser=self.team_abbreviation_parser)
80+
self.division_name_parser = DivisionNameParser(divisions=Division)
81+
self.team_standings_parser = TeamStandingsParser(teams=Team)
82+
self.conference_division_standings_parser = ConferenceDivisionStandingsParser(
83+
division_name_parser=self.division_name_parser,
84+
team_standings_parser=self.team_standings_parser,
85+
divisions_to_conferences=DIVISIONS_TO_CONFERENCES,
86+
)
87+
88+
def parse_division_standings(self, standings):
89+
return self.conference_division_standings_parser.parse(division_standings=standings)
7990

8091
def parse_play_by_plays(self, play_by_plays, away_team_name, home_team_name):
8192
return self.play_by_plays_parser.parse(

basketball_reference_web_scraper/parsers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,30 @@ def parse_resource_identifier(self, resource_location):
258258
return self.search(resource_location=resource_location).group(self.resource_identifier_regex_group_name)
259259

260260

261+
class TeamStandingsParser:
262+
def __init__(self, teams):
263+
self.teams = teams
264+
265+
def parse_team(self, formatted_name):
266+
for team in self.teams:
267+
if formatted_name.upper().startswith(team.value):
268+
return team
269+
270+
return None
271+
272+
273+
class DivisionNameParser:
274+
def __init__(self, divisions):
275+
self.divisions = divisions
276+
277+
def parse_division(self, formatted_name):
278+
for division in self.divisions:
279+
if formatted_name.upper() == "{division} DIVISION".format(division=division.value):
280+
return division
281+
282+
return None
283+
284+
261285
class ScheduledGamesParser:
262286
def __init__(self, start_time_parser, team_name_parser):
263287
self.start_time_parser = start_time_parser
@@ -552,3 +576,26 @@ def parse(self, player):
552576
)
553577
)
554578
}
579+
580+
581+
class ConferenceDivisionStandingsParser:
582+
def __init__(self, division_name_parser, team_standings_parser, divisions_to_conferences):
583+
self.division_name_parser = division_name_parser
584+
self.team_standings_parser = team_standings_parser
585+
self.divisions_to_conferences = divisions_to_conferences
586+
587+
def parse(self, division_standings):
588+
current_division = None
589+
results = []
590+
for standing in division_standings:
591+
if standing.is_division_name_row:
592+
current_division = self.division_name_parser.parse_division(formatted_name=standing.division_name)
593+
else:
594+
results.append({
595+
"team": self.team_standings_parser.parse_team(formatted_name=standing.team_name),
596+
"wins": str_to_int(standing.wins),
597+
"losses": str_to_int(standing.losses),
598+
"division": current_division,
599+
"conference": self.divisions_to_conferences.get(current_division),
600+
})
601+
return results

docs/api.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,3 +551,46 @@ or appended to the specified file path (or any of other the Python file mode opt
551551
)
552552
```
553553

554+
### Standings
555+
556+
* [`repli.t` Examples](https://repl.it/@jaebradley/Standings#main.py)
557+
558+
=== "Python Data Structures"
559+
```python
560+
from basketball_reference_web_scraper import client
561+
562+
client.standings(season_end_year=2019)
563+
```
564+
565+
=== "JSON"
566+
```python
567+
from basketball_reference_web_scraper import client
568+
from basketball_reference_web_scraper.data import OutputType
569+
570+
client.standings(season_end_year=2019, output_type=OutputType.JSON)
571+
```
572+
573+
=== "JSON to file"
574+
```python
575+
from basketball_reference_web_scraper import client
576+
from basketball_reference_web_scraper.data import OutputType
577+
578+
client.standings(
579+
season_end_year=2019,
580+
output_type=OutputType.JSON,
581+
output_file_path="./2019_standings.json"
582+
)
583+
```
584+
585+
=== "CSV to file"
586+
```python
587+
from basketball_reference_web_scraper import client
588+
from basketball_reference_web_scraper.data import OutputType
589+
590+
client.standings(
591+
season_end_year=2019,
592+
output_type=OutputType.JSON,
593+
output_file_path="./2019_standings.csv"
594+
)
595+
```
596+

0 commit comments

Comments
 (0)