{corona-virus-intro}

This is post will help you to export Statsbomb free data to BigQuery. Before you even look at the first line of code go to Statsbomb Resource Center and sign StatsBomb Public Data User Agreement. If you don’t do that, feds will do a sweep.

Classic

from google.cloud import bigquery import os import requests import json from io import StringIO

This is some longass cell but schema is quite important here. First of all you can see how well BQ handles JSONs and what’s actually more important, autogenerated schema is not really reliable. Also few useful functions to make working with BQ easier.

SCHEMA = [ { "mode" : "NULLABLE" , "name" : "competition_id" , "type" : "INTEGER" }, { "mode" : "NULLABLE" , "name" : "season_id" , "type" : "INTEGER" }, { "mode" : "NULLABLE" , "name" : "country_name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "competition_name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "season_name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "match_id" , "type" : "INTEGER" }, { "mode" : "NULLABLE" , "name" : "match_date" , "type" : "DATE" }, { "mode" : "REPEATED" , "name" : "location" , "type" : "FLOAT" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "position" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "player" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "body_part" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "type" , "type" : "RECORD" }, { "mode" : "REPEATED" , "name" : "end_location" , "type" : "FLOAT" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "height" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "angle" , "type" : "FLOAT" }, { "mode" : "NULLABLE" , "name" : "length" , "type" : "FLOAT" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "recipient" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "switch" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "aerial_won" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "cross" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "deflected" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "assisted_shot_id" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "shot_assist" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "goal_assist" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "through_ball" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "outswinging" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "inswinging" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "no_touch" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "cut_back" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "miscommunication" , "type" : "BOOLEAN" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "technique" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "straight" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "backheel" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "pass" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "team" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "possession_team" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "possession" , "type" : "INTEGER" }, { "mode" : "NULLABLE" , "name" : "second" , "type" : "INTEGER" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "play_pattern" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "minute" , "type" : "INTEGER" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "type" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "timestamp" , "type" : "TIME" }, { "mode" : "NULLABLE" , "name" : "period" , "type" : "INTEGER" }, { "mode" : "REPEATED" , "name" : "related_events" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "off_camera" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "counterpress" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "duration" , "type" : "FLOAT" }, { "mode" : "NULLABLE" , "name" : "index" , "type" : "INTEGER" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "jersey_number" , "type" : "INTEGER" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "position" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "player" , "type" : "RECORD" } ], "mode" : "REPEATED" , "name" : "lineup" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "formation" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "tactics" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" } ], "mode" : "NULLABLE" , "name" : "ball_receipt" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "under_pressure" , "type" : "BOOLEAN" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "body_part" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "head" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "left_foot" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "right_foot" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "aerial_won" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "other" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "clearance" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "type" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" } ], "mode" : "NULLABLE" , "name" : "duel" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "aerial_won" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "miscontrol" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "recovery_failure" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "offensive" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "ball_recovery" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" } ], "mode" : "NULLABLE" , "name" : "interception" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "deflection" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "offensive" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "save_block" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "block" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "position" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "player" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "teammate" , "type" : "BOOLEAN" }, { "mode" : "REPEATED" , "name" : "location" , "type" : "FLOAT" } ], "mode" : "REPEATED" , "name" : "freeze_frame" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "body_part" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "type" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "technique" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "key_pass_id" , "type" : "STRING" }, { "mode" : "REPEATED" , "name" : "end_location" , "type" : "FLOAT" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "statsbomb_xg" , "type" : "FLOAT" }, { "mode" : "NULLABLE" , "name" : "open_goal" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "first_time" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "deflected" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "one_on_one" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "aerial_won" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "saved_off_target" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "follows_dribble" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "saved_to_post" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "redirect" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "kick_off" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "shot" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "position" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "type" , "type" : "RECORD" }, { "mode" : "REPEATED" , "name" : "end_location" , "type" : "FLOAT" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "technique" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "body_part" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "punched_out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "shot_saved_off_target" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "shot_saved_to_post" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "success_in_play" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "saved_to_post" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "lost_out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "success_out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "lost_in_play" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "penalty_saved_to_post" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "goalkeeper" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "nutmeg" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "overrun" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "no_touch" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "dribble" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "advantage" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "defensive" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "offensive" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "penalty" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "foul_won" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "offensive" , "type" : "BOOLEAN" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "type" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "advantage" , "type" : "BOOLEAN" }, { "mode" : "NULLABLE" , "name" : "penalty" , "type" : "BOOLEAN" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "card" , "type" : "RECORD" } ], "mode" : "NULLABLE" , "name" : "foul_committed" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "late_video_start" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "half_start" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "early_video_end" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "half_end" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "replacement" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" } ], "mode" : "NULLABLE" , "name" : "substitution" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "card" , "type" : "RECORD" } ], "mode" : "NULLABLE" , "name" : "bad_behaviour" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "in_chain" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "injury_stoppage" , "type" : "RECORD" }, { "fields" : [ { "fields" : [ { "mode" : "NULLABLE" , "name" : "name" , "type" : "STRING" }, { "mode" : "NULLABLE" , "name" : "id" , "type" : "INTEGER" } ], "mode" : "NULLABLE" , "name" : "outcome" , "type" : "RECORD" }, { "mode" : "NULLABLE" , "name" : "out" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "_50_50" , "type" : "RECORD" }, { "fields" : [ { "mode" : "NULLABLE" , "name" : "permanent" , "type" : "BOOLEAN" } ], "mode" : "NULLABLE" , "name" : "player_off" , "type" : "RECORD" }, { "fields" : [ { "mode" : "REPEATED" , "name" : "end_location" , "type" : "FLOAT" } ], "mode" : "NULLABLE" , "name" : "carry" , "type" : "RECORD" } ] def _get_field_schema (field): name = field[ 'name' ] field_type = field . get( 'type' , 'STRING' ) mode = field . get( 'mode' , 'NULLABLE' ) fields = field . get( 'fields' , []) if fields: subschema = [] for f in fields: fields_res = _get_field_schema(f) subschema . append(fields_res) else : subschema = [] field_schema = bigquery . SchemaField(name = name, field_type = field_type, mode = mode, fields = subschema ) return field_schema def parse_bq_json_schema (schema_variable): schema = [] schema_variable for field in schema_variable: schema . append(_get_field_schema(field)) return schema def if_tbl_exists (client, table_ref): from google.cloud.exceptions import NotFound try : client . get_table(table_ref) return True except NotFound: return False

To run this cell you need to create Service Account for your Google Cloud. I’m not going to explain how to do this.Go here. Or use google (or duck duck go if you care about privacy but then you won’t like GCP much).

Code below authenticate your notebook to access BQ. You can query, remove tables, datasets, add new rows. Lots of stuff. We just create dataset (first check if one exists - rows 8 to 13). Define table name (row 15) and options of your import (rows 18 and 19)

# Construct a BigQuery client object. os . environ[ "GOOGLE_APPLICATION_CREDENTIALS" ] = os . path . normpath( "sa.json" ) client = bigquery . Client() dataset_id = "{}.statsbomb" . format(client . project) dataset = bigquery . Dataset(dataset_id) dataset . location = "US" try : dataset = client . get_dataset(dataset) print ( " {}.{} exists" . format(client . project, dataset . dataset_id)) except : dataset = client . create_dataset(dataset) print ( "Created dataset {}.{}" . format(client . project, dataset . dataset_id)) table_id = "free_data" #your table name table = dataset . table(table_id) job_config = bigquery . LoadJobConfig() job_config . source_format = bigquery . SourceFormat . NEWLINE_DELIMITED_JSON job_config . schema = parse_bq_json_schema(SCHEMA)

deductive-motif-114121.statsbomb exists

Here we getting list of all competitions.

competition_url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/competitions.json" r = requests . get(competition_url) competitions = r . json()

Here we getting all free matches.

all_matches = [] for i in competitions: url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/matches/{comp_id}/{season_id}.json" . format( comp_id = i[ 'competition_id' ], season_id = i[ 'season_id' ]) r = requests . get(url) matches = r . json() for j in matches: all_matches . append(j)

This list stores all processed match ids. Just in case (schema may change, then we need to make update, so it’s good to know what broke the upload).

processed_events = []

All the magic happens here. We loop over all matches, get the data, assign match_id and match_date (not avaible in api call). Change the name of one of the colums (in BQ column name can not begins with number - row 11,12).

Rows 14, 15 and 16, are weirdly looking but BQ accepts only new line delimited json. More info. Then string is converted to bytes and we can upload it as file.

for i in all_matches: if i[ 'match_id' ] not in processed_events: match_events = [] url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json" . format( match_id = i[ 'match_id' ]) r = requests . get(url) events = r . json() for event in events: event[ 'match_id' ] = i[ 'match_id' ] event[ 'match_date' ] = i[ 'match_date' ] event[ "competition_id" ] = i[ 'competition' ][ 'competition_id' ] event[ "country_name" ] = i[ 'competition' ][ 'country_name' ] event[ "competition_name" ] = i[ 'competition' ][ 'competition_name' ] event[ "season_id" ] = i[ 'season' ][ 'season_id' ] event[ "season_name" ] = i[ 'season' ][ 'season_name' ] for key in list(event): event[ "_50_50" ] = event . pop(key) if key == "50_50" else None match_events . append(event) result = [json . dumps(record) for record in match_events] result = '

' . join(result) data_as_file = StringIO(result) job = client . load_table_from_file(data_as_file, table, job_config = job_config) job . result() # Waits for table load to complete. processed_events . append(i[ 'match_id' ])

But why to even bother. To easily connect BQ to dashboarding tool. Below example of how to create view that can be connected to data studio.

viewSQL = """ SELECT player.name,count(distinct id) quality_shots, COUNT(DISTINCT IF(shot.outcome.name="Goal", id,null)) goals_from_quality_shots FROM `deductive-motif-114121.statsbomb.free_data` where competition_name = "FA Women's Super League" and season_name ="2019/2020" and shot.statsbomb_xg >0.2 group by 1 """ table = dataset . table(view_name) table . view_query = viewSQL table . create()

Not pretty but easy to make (10 clicks) dashboard.