Home Archives Search Feed Football Squares How To Use


import urllib2
from bs4 import BeautifulSoup
from bs4 import Comment


baseball = "https://www.baseball-reference.com/leagues/MLB-schedule.shtml"
page = urllib2.urlopen(baseball)
soup = BeautifulSoup(page, 'html.parser')

mlb_19 = []

for link in soup.findAll('a', href=True, text='Boxscore'):
    mlb_19.append("https://www.baseball-reference.com" + str(link['href']))
def get_PA(url):
    ans = []
    page = urllib2.urlopen(url)
    soup = BeautifulSoup(page,'html.parser')
    comments = soup.find_all(string=lambda text:isinstance(text,Comment))
    
    for c in comments:
        if 'Team Totals' in c:
            commentsoup = BeautifulSoup(c, 'lxml')
            foot = commentsoup.findAll('tfoot')
            for f in foot:
                vv = f.find('td',{'data-stat':'PA'})
                if vv != None:
                    ans.append(vv.get_text())
    return ans
PA = []
for game in mlb_19:
    for pa in get_PA(game):
        PA.append(pa)
pa_int = [int(x) for x in PA]
H_A = ['A','H']*(len(PA)/2)
import pandas as pd

plate_appear = pd.DataFrame(
    {'PA': pa_int,
     'H_A': H_A
    })
plate_appear[plate_appear['H_A']=='H']['PA'].mean()
37.288686605981795
plate_appear[plate_appear['H_A']=='A']['PA'].mean()
39.022106631989594
import matplotlib.pyplot as plt

%matplotlib inline
plate_appear.hist(column='PA',bins=25)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000F752588>]], dtype=object)

png

float((plate_appear[plate_appear['H_A']=='A']['PA']>=46).sum())/plate_appear[plate_appear['H_A']=='A'].shape[0]
0.11313394018205461
float((plate_appear[plate_appear['H_A']=='H']['PA']>=48).sum())/plate_appear[plate_appear['H_A']=='H'].shape[0]
0.02600780234070221
away = plate_appear[plate_appear['H_A']=='A']
home = plate_appear[plate_appear['H_A']=='H']
away.hist(column='PA',bins=25)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000F5530F0>]], dtype=object)

png

home.hist(column='PA',bins=25)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000F457080>]], dtype=object)

png

1-(.750**6)
0.822021484375
guys = ['arenado',
        'blackman',
        'freeman',
        'lemaheiu',
        'segura',
        'betts',
        'turner',
        'rameriez, h'
        'meadows']
Posted on 6/4/2019






← Next post