Skip to content

Commit b3627f7

Browse files
authored
Release v4.4 (#93)
* Adds play-by-play stats scraping for pages like https://www.basketball-reference.com/boxscores/pbp/201810160BOS.html (#86) * Rewrites internal writing interface (#89) * Updates `README` to reflect new `play_by_play` client method and adds `Contributors` section (#94)
2 parents 947eb40 + f41e796 commit b3627f7

24 files changed

+1062
-301
lines changed

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,26 @@ client.players_advanced_season_totals(season_end_year=2018)
133133
# The players_advanced_season_totals method also supports all output behavior previously described
134134
```
135135

136+
### Get play-by-play data for a game
137+
138+
The structure of the API is due to the unique URL pattern that Basketball Reference has for getting play-by-play data,
139+
which depends on the date of the game and the home team.
140+
141+
Example: `https://www.basketball-reference.com/boxscores/pbp/201810160BOS.html`
142+
143+
```python
144+
from basketball_reference_web_scraper import client
145+
from basketball_reference_web_scraper.data import Team
146+
147+
# Get play-by-play data for 2018-10-16 game played at the Boston Celtics
148+
play_by_play = client.play_by_play(
149+
home_team=Team.BOSTON_CELTICS,
150+
year=2018,
151+
month=10,
152+
day=16,
153+
)
154+
```
155+
136156
## Development
137157

138158
There are currently two supported major versions - `V3` and `V4`.
@@ -142,3 +162,7 @@ when making changes.
142162

143163
`master` will reflect the latest major version branch.
144164

165+
## Contributors
166+
167+
Thanks to [@DaiJunyan](https://github.com/DaiJunyan) for their contributions!
168+

basketball_reference_web_scraper/client.py

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import requests
22

33
from basketball_reference_web_scraper import http_client
4-
54
from basketball_reference_web_scraper.errors import InvalidSeason, InvalidDate
6-
from basketball_reference_web_scraper.output import box_scores_to_csv, schedule_to_csv, players_season_totals_to_csv, players_advanced_season_totals_to_csv, team_box_scores_to_csv
75
from basketball_reference_web_scraper.output import output
8-
from basketball_reference_web_scraper.json_encoders import BasketballReferenceJSONEncoder
6+
from basketball_reference_web_scraper.writers import CSVWriter, RowFormatter, \
7+
BOX_SCORE_COLUMN_NAMES, SCHEDULE_COLUMN_NAMES, PLAYER_SEASON_TOTALS_COLUMN_NAMES, \
8+
PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES, TEAM_BOX_SCORES_COLUMN_NAMES, PLAY_BY_PLAY_COLUMN_NAMES
99

1010

11-
def player_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
11+
def player_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
12+
json_options=None):
1213
try:
1314
values = http_client.player_box_scores(day=day, month=month, year=year)
1415
except requests.exceptions.HTTPError as http_error:
@@ -21,13 +22,16 @@ def player_box_scores(day, month, year, output_type=None, output_file_path=None,
2122
output_type=output_type,
2223
output_file_path=output_file_path,
2324
output_write_option=output_write_option,
24-
csv_writer=box_scores_to_csv,
25-
encoder=BasketballReferenceJSONEncoder,
25+
csv_writer=CSVWriter(
26+
column_names=BOX_SCORE_COLUMN_NAMES,
27+
row_formatter=RowFormatter(data_field_names=BOX_SCORE_COLUMN_NAMES)
28+
),
2629
json_options=json_options,
2730
)
2831

2932

30-
def season_schedule(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
33+
def season_schedule(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
34+
json_options=None):
3135
try:
3236
values = http_client.season_schedule(season_end_year)
3337
except requests.exceptions.HTTPError as http_error:
@@ -41,13 +45,16 @@ def season_schedule(season_end_year, output_type=None, output_file_path=None, ou
4145
output_type=output_type,
4246
output_file_path=output_file_path,
4347
output_write_option=output_write_option,
44-
csv_writer=schedule_to_csv,
45-
encoder=BasketballReferenceJSONEncoder,
48+
csv_writer=CSVWriter(
49+
column_names=SCHEDULE_COLUMN_NAMES,
50+
row_formatter=RowFormatter(data_field_names=SCHEDULE_COLUMN_NAMES)
51+
),
4652
json_options=json_options,
4753
)
4854

4955

50-
def players_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
56+
def players_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
57+
json_options=None):
5158
try:
5259
values = http_client.players_season_totals(season_end_year)
5360
except requests.exceptions.HTTPError as http_error:
@@ -60,13 +67,16 @@ def players_season_totals(season_end_year, output_type=None, output_file_path=No
6067
output_type=output_type,
6168
output_file_path=output_file_path,
6269
output_write_option=output_write_option,
63-
csv_writer=players_season_totals_to_csv,
64-
encoder=BasketballReferenceJSONEncoder,
70+
csv_writer=CSVWriter(
71+
column_names=PLAYER_SEASON_TOTALS_COLUMN_NAMES,
72+
row_formatter=RowFormatter(data_field_names=PLAYER_SEASON_TOTALS_COLUMN_NAMES)
73+
),
6574
json_options=json_options,
6675
)
6776

6877

69-
def players_advanced_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
78+
def players_advanced_season_totals(season_end_year, output_type=None, output_file_path=None, output_write_option=None,
79+
json_options=None):
7080
try:
7181
values = http_client.players_advanced_season_totals(season_end_year)
7282
except requests.exceptions.HTTPError as http_error:
@@ -79,13 +89,16 @@ def players_advanced_season_totals(season_end_year, output_type=None, output_fil
7989
output_type=output_type,
8090
output_file_path=output_file_path,
8191
output_write_option=output_write_option,
82-
csv_writer=players_advanced_season_totals_to_csv,
83-
encoder=BasketballReferenceJSONEncoder,
92+
csv_writer=CSVWriter(
93+
column_names=PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES,
94+
row_formatter=RowFormatter(data_field_names=PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES)
95+
),
8496
json_options=json_options,
8597
)
8698

8799

88-
def team_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None, json_options=None):
100+
def team_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
101+
json_options=None):
89102
try:
90103
values = http_client.team_box_scores(day=day, month=month, year=year)
91104
except requests.exceptions.HTTPError as http_error:
@@ -98,7 +111,31 @@ def team_box_scores(day, month, year, output_type=None, output_file_path=None, o
98111
output_type=output_type,
99112
output_file_path=output_file_path,
100113
output_write_option=output_write_option,
101-
csv_writer=team_box_scores_to_csv,
102-
encoder=BasketballReferenceJSONEncoder,
114+
csv_writer=CSVWriter(
115+
column_names=TEAM_BOX_SCORES_COLUMN_NAMES,
116+
row_formatter=RowFormatter(data_field_names=TEAM_BOX_SCORES_COLUMN_NAMES)
117+
),
118+
json_options=json_options,
119+
)
120+
121+
122+
def play_by_play(home_team, day, month, year, output_type=None, output_file_path=None, output_write_option=None,
123+
json_options=None):
124+
try:
125+
values = http_client.play_by_play(home_team=home_team, day=day, month=month, year=year)
126+
except requests.exceptions.HTTPError as http_error:
127+
if http_error.response.status_code == requests.codes.not_found:
128+
raise InvalidDate(day=day, month=month, year=year)
129+
else:
130+
raise http_error
131+
return output(
132+
values=values,
133+
output_type=output_type,
134+
output_file_path=output_file_path,
135+
output_write_option=output_write_option,
136+
csv_writer=CSVWriter(
137+
column_names=PLAY_BY_PLAY_COLUMN_NAMES,
138+
row_formatter=RowFormatter(data_field_names=PLAY_BY_PLAY_COLUMN_NAMES)
139+
),
103140
json_options=json_options,
104141
)

basketball_reference_web_scraper/data.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ class Position(Enum):
7474
GUARD = "GUARD"
7575

7676

77+
class PeriodType(Enum):
78+
QUARTER = "QUARTER"
79+
OVERTIME = "OVERTIME"
80+
81+
7782
TEAM_ABBREVIATIONS_TO_TEAM = {
7883
'ATL': Team.ATLANTA_HAWKS,
7984
'BOS': Team.BOSTON_CELTICS,
@@ -116,6 +121,9 @@ class Position(Enum):
116121
'VAN': Team.VANCOUVER_GRIZZLIES,
117122
}
118123

124+
TEAM_TO_TEAM_ABBREVIATION = {v: k for k, v in TEAM_ABBREVIATIONS_TO_TEAM.items()}
125+
TEAM_TO_TEAM_ABBREVIATION[Team.CHARLOTTE_HORNETS] = "CHO"
126+
119127
TEAM_NAME_TO_TEAM = {
120128
"ATLANTA HAWKS": Team.ATLANTA_HAWKS,
121129
"BOSTON CELTICS": Team.BOSTON_CELTICS,

basketball_reference_web_scraper/http_client.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import requests
22

3+
from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION
34
from basketball_reference_web_scraper.errors import InvalidDate
4-
from basketball_reference_web_scraper.parsers.box_scores.players import parse_player_box_scores
55
from basketball_reference_web_scraper.parsers.box_scores.games import parse_game_url_paths
6+
from basketball_reference_web_scraper.parsers.play_by_play import parse_play_by_plays
7+
from basketball_reference_web_scraper.parsers.box_scores.players import parse_player_box_scores
68
from basketball_reference_web_scraper.parsers.box_scores.teams import parse_team_totals
7-
from basketball_reference_web_scraper.parsers.schedule import parse_schedule, parse_schedule_for_month_url_paths
8-
from basketball_reference_web_scraper.parsers.players_season_totals import parse_players_season_totals
99
from basketball_reference_web_scraper.parsers.players_advanced_season_totals import parse_players_advanced_season_totals
10+
from basketball_reference_web_scraper.parsers.players_season_totals import parse_players_season_totals
11+
from basketball_reference_web_scraper.parsers.schedule import parse_schedule, parse_schedule_for_month_url_paths
1012

1113
BASE_URL = 'https://www.basketball-reference.com'
1214

@@ -108,3 +110,17 @@ def team_box_scores(day, month, year):
108110
for game_url_path in game_url_paths
109111
for box_score in team_box_score(game_url_path=game_url_path)
110112
]
113+
114+
115+
def play_by_play(home_team, day, month, year):
116+
117+
add_0_if_needed = lambda s: "0" + s if len(s) == 1 else s
118+
119+
# the hard-coded `0` in the url assumes we always take the first match of the given date and team.
120+
url = "{BASE_URL}/boxscores/pbp/{year}{month}{day}0{team_abbr}.html".format(
121+
BASE_URL=BASE_URL, year=year, month=add_0_if_needed(str(month)), day=add_0_if_needed(str(day)),
122+
team_abbr=TEAM_TO_TEAM_ABBREVIATION[home_team]
123+
)
124+
response = requests.get(url=url)
125+
response.raise_for_status()
126+
return parse_play_by_plays(response.content, home_team)

basketball_reference_web_scraper/json_encoders.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from datetime import datetime
2-
from json import JSONEncoder
32
from enum import Enum
3+
from json import JSONEncoder
44

55

66
class BasketballReferenceJSONEncoder(JSONEncoder):

0 commit comments

Comments
 (0)