FIFA Analysis with Data Science

Founded in 1904 to provide unity among national soccer associations, the Federation Internationale de Football Association (FIFA) boasts 209 members, rivaling that of the United Nations, and is arguably the most prestigious sports organization in the world.

In this Data Science Project we will do some analysis on the matches and records of FIFA with Python.

Lets start by importing libraries

```import numpy as np
import pandas as pd

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()```

```data = pd.read_csv('data.csv')
print(data.shape)```

#Output- (18207, 89)

To check the first 5 rows and columns

`data.head()`

Let’s Eye on Indian Footballers

```def country(x):
return data[data['Nationality'] == x][['Name','Overall','Potential','Position']]

# let's check the Indian Players
country('India')```

Analyzing Club Data( Manchester United)

```def club(x):
return data[data['Club'] == x][['Name','Jersey Number','Position','Overall','Nationality','Age','Wage',
'Value','Contract Valid Until']]

club('Manchester United')```
```x = club('Manchester United')
x.shape```

#Output– (33, 9)

Describing the data

`data.describe()`

Filling the missing value for the continuous variables for proper data visualization

```data['ShortPassing'].fillna(data['ShortPassing'].mean(), inplace = True)
data['Volleys'].fillna(data['Volleys'].mean(), inplace = True)
data['Dribbling'].fillna(data['Dribbling'].mean(), inplace = True)
data['Curve'].fillna(data['Curve'].mean(), inplace = True)
data['FKAccuracy'].fillna(data['FKAccuracy'], inplace = True)
data['LongPassing'].fillna(data['LongPassing'].mean(), inplace = True)
data['BallControl'].fillna(data['BallControl'].mean(), inplace = True)
data['Finishing'].fillna(data['Finishing'].mean(), inplace = True)
data['Crossing'].fillna(data['Crossing'].mean(), inplace = True)
data['Weight'].fillna('200lbs', inplace = True)
data['Contract Valid Until'].fillna(2019, inplace = True)
data['Height'].fillna("5'11", inplace = True)
data['Loaned From'].fillna('None', inplace = True)
data['Joined'].fillna('Jul 1, 2018', inplace = True)
data['Jersey Number'].fillna(8, inplace = True)
data['Body Type'].fillna('Normal', inplace = True)
data['Position'].fillna('ST', inplace = True)
data['Club'].fillna('No Club', inplace = True)
data['Work Rate'].fillna('Medium/ Medium', inplace = True)
data['Skill Moves'].fillna(data['Skill Moves'].median(), inplace = True)
data['Weak Foot'].fillna(3, inplace = True)
data['Preferred Foot'].fillna('Right', inplace = True)
data['International Reputation'].fillna(1, inplace = True)
data['Wage'].fillna('â‚¬200K', inplace = True)
data.fillna(0, inplace = True)```
```def defending(data):
return int(round((data[['Marking', 'StandingTackle',
'SlidingTackle']].mean()).mean()))

def general(data):
'BallControl']].mean()).mean()))

def mental(data):
return int(round((data[['Aggression', 'Interceptions', 'Positioning',
'Vision','Composure']].mean()).mean()))

def passing(data):
return int(round((data[['Crossing', 'ShortPassing',
'LongPassing']].mean()).mean()))

def mobility(data):
return int(round((data[['Acceleration', 'SprintSpeed',
'Agility','Reactions']].mean()).mean()))
def power(data):
return int(round((data[['Balance', 'Jumping', 'Stamina',
'Strength']].mean()).mean()))

def rating(data):
return int(round((data[['Potential', 'Overall']].mean()).mean()))

def shooting(data):
return int(round((data[['Finishing', 'Volleys', 'FKAccuracy',
'ShotPower','LongShots', 'Penalties']].mean()).mean()))```

Renaming the columns

```data.rename(columns={'Club Logo':'Club_Logo'}, inplace=True)

# adding these categories to the data

data['Defending'] = data.apply(defending, axis = 1)
data['General'] = data.apply(general, axis = 1)
data['Mental'] = data.apply(mental, axis = 1)
data['Passing'] = data.apply(passing, axis = 1)
data['Mobility'] = data.apply(mobility, axis = 1)
data['Power'] = data.apply(power, axis = 1)
data['Rating'] = data.apply(rating, axis = 1)
data['Shooting'] = data.apply(shooting, axis = 1)```
```players = data[['Name','Defending','General','Mental','Passing',
'Mobility','Power','Rating','Shooting','Flag','Age',
'Nationality', 'Photo', 'Club_Logo', 'Club']]

Data Visualization

Comparison of preferred foot over the different players

```plt.rcParams['figure.figsize'] = (10, 5)
sns.countplot(data['Preferred Foot'], palette = 'pink')
plt.title('Most Preferred Foot of the Players', fontsize = 20)
plt.show()```

Plotting a pie chart to represent share of international reputation

```labels = ['1', '2', '3', '4', '5']
sizes = data['International Reputation'].value_counts()
colors = plt.cm.copper(np.linspace(0, 1, 5))
explode = [0.1, 0.1, 0.2, 0.5, 0.9]

plt.rcParams['figure.figsize'] = (9, 9)
plt.pie(sizes, labels = labels, colors = colors, explode = explode, shadow = True)
plt.title('International Repuatation for the Football Players', fontsize = 20)
plt.legend()
plt.show()```

Different positions acquired by the players

```plt.figure(figsize = (18, 8))
plt.style.use('fivethirtyeight')
ax = sns.countplot('Position', data = data, palette = 'bone')
ax.set_xlabel(xlabel = 'Different Positions in Football', fontsize = 16)
ax.set_ylabel(ylabel = 'Count of Players', fontsize = 16)
ax.set_title(label = 'Comparison of Positions and Players', fontsize = 20)
plt.show()```

Defining a function for cleaning the Weight data

```def extract_value_from(value):
out = value.replace('lbs', '')
return float(out)

# applying the function to weight column
#data['value'] = data['value'].apply(lambda x: extract_value_from(x))
data['Weight'] = data['Weight'].apply(lambda x : extract_value_from(x))

```#Output
0    159.0
1    183.0
2    150.0
3    168.0
4    154.0
Name: Weight, dtype: float64```

Defining a function for cleaning the wage column

```def extract_value_from(Value):
out = Value.replace('â‚¬', '')
if 'M' in out:
out = float(out.replace('M', ''))*1000000
elif 'K' in Value:
out = float(out.replace('K', ''))*1000
return float(out)```

Applying the function to the wage column

```data['Value'] = data['Value'].apply(lambda x: extract_value_from(x))
data['Wage'] = data['Wage'].apply(lambda x: extract_value_from(x))

```#Output
0    565000.0
1    405000.0
2    290000.0
3    260000.0
4    355000.0
Name: Wage, dtype: float64```

Comparing the players’ Wages

```import warnings
warnings.filterwarnings('ignore')

plt.rcParams['figure.figsize'] = (15, 5)
sns.distplot(data['Wage'], color = 'blue')
plt.xlabel('Wage Range for Players', fontsize = 16)
plt.ylabel('Count of the Players', fontsize = 16)
plt.title('Distribution of Wages of Players', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()```

Skill Moves of Players

```plt.figure(figsize = (10, 8))
ax = sns.countplot(x = 'Skill Moves', data = data, palette = 'pastel')
ax.set_title(label = 'Count of players on Basis of their skill moves', fontsize = 20)
ax.set_xlabel(xlabel = 'Number of Skill Moves', fontsize = 16)
ax.set_ylabel(ylabel = 'Count', fontsize = 16)
plt.show()```

Height of Players

```plt.figure(figsize = (13, 8))
ax = sns.countplot(x = 'Height', data = data, palette = 'dark')
ax.set_title(label = 'Count of players on Basis of Height', fontsize = 20)
ax.set_xlabel(xlabel = 'Height in Foot per inch', fontsize = 16)
ax.set_ylabel(ylabel = 'Count', fontsize = 16)
plt.show()```

To show Different body weight of the players participating in the FIFA 2019

```plt.figure(figsize = (20, 5))
sns.distplot(data['Weight'], color = 'pink')
plt.title('Different Weights of the Players Participating in FIFA 2019', fontsize = 20)
plt.xlabel('Heights associated with the players', fontsize = 16)
plt.ylabel('count of Players', fontsize = 16)
plt.show()```

To show Different Work rate of the players participating in the FIFA 2019

```plt.figure(figsize = (15, 7))

sns.countplot(x = 'Work Rate', data = data, palette = 'hls')
plt.title('Different work rates of the Players Participating in the FIFA 2019', fontsize = 20)
plt.xlabel('Work rates associated with the players', fontsize = 16)
plt.ylabel('count of Players', fontsize = 16)
plt.show()```

To show Different Speciality Score of the players participating in the FIFA 2019

```x = data.Special
plt.figure(figsize = (12, 8))
plt.style.use('tableau-colorblind10')

ax = sns.distplot(x, bins = 58, kde = False, color = 'm')
ax.set_xlabel(xlabel = 'Special score range', fontsize = 16)
ax.set_ylabel(ylabel = 'Count of the Players',fontsize = 16)
ax.set_title(label = 'Histogram for the Speciality Scores of the Players', fontsize = 20)
plt.show()```

To show Different potential scores of the players participating in the FIFA 2019

```x = data.Potential
plt.figure(figsize=(12,8))
plt.style.use('seaborn-paper')

ax = sns.distplot(x, bins = 58, kde = False, color = 'y')
ax.set_xlabel(xlabel = "Player\'s Potential Scores", fontsize = 16)
ax.set_ylabel(ylabel = 'Number of players', fontsize = 16)
ax.set_title(label = 'Histogram of players Potential Scores', fontsize = 20)
plt.show()```

To show Different nations participating in the FIFA 2019

```plt.style.use('dark_background')
data['Nationality'].value_counts().head(80).plot.bar(color = 'orange', figsize = (20, 7))
plt.title('Different Nations Participating in FIFA 2019', fontsize = 30, fontweight = 20)
plt.xlabel('Name of The Country')
plt.ylabel('count')
plt.show()```

Countries with Most Players

Picking up the countries with highest number of players to compare their overall scores

`data['Nationality'].value_counts().head(8)`
```#Output-
England      1662
Germany      1198
Spain        1072
Argentina     937
France        914
Brazil        827
Italy         702
Colombia      618
Name: Nationality, dtype: int64```

Every Nations’ Player and their Weights

```some_countries = ('England', 'Germany', 'Spain', 'Argentina', 'France', 'Brazil', 'Italy', 'Columbia')
data_countries = data.loc[data['Nationality'].isin(some_countries) & data['Weight']]

plt.rcParams['figure.figsize'] = (15, 7)
ax = sns.violinplot(x = data_countries['Nationality'], y = data_countries['Weight'], palette = 'Reds')
ax.set_xlabel(xlabel = 'Countries', fontsize = 9)
ax.set_ylabel(ylabel = 'Weight in lbs', fontsize = 9)
ax.set_title(label = 'Distribution of Weight of players from different countries', fontsize = 20)
plt.show()```

Finding the the popular clubs around the globe

`data['Club'].value_counts().head(10)`
```#Output
No Club                241
Liverpool               33
TSG 1899 Hoffenheim     33
Burnley                 33
Arsenal                 33
Southampton             33
Frosinone               33
Empoli                  33
Fortuna DÃ¼sseldorf      33
Name: Club, dtype: int64```
```some_clubs = ('CD LeganÃ©s', 'Southampton', 'RC Celta', 'Empoli', 'Fortuna DÃ¼sseldorf', 'Manchestar City',
'Tottenham Hotspur', 'FC Barcelona', 'Valencia CF', 'Chelsea', 'Real Madrid')

data_clubs = data.loc[data['Club'].isin(some_clubs) & data['Overall']]

plt.rcParams['figure.figsize'] = (15, 8)
ax = sns.boxplot(x = data_clubs['Club'], y = data_clubs['Overall'], palette = 'inferno')
ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()```

Distribution of Wages in some Popular clubs

```some_clubs = ('CD LeganÃ©s', 'Southampton', 'RC Celta', 'Empoli', 'Fortuna DÃ¼sseldorf', 'Manchestar City',
'Tottenham Hotspur', 'FC Barcelona', 'Valencia CF', 'Chelsea', 'Real Madrid')

data_club = data.loc[data['Club'].isin(some_clubs) & data['Wage']]

plt.rcParams['figure.figsize'] = (16, 8)
ax = sns.boxplot(x = 'Club', y = 'Wage', data = data_club, palette = 'Reds')
ax.set_xlabel(xlabel = 'Names of some popular Clubs', fontsize = 10)
ax.set_ylabel(ylabel = 'Distribution', fontsize = 10)
ax.set_title(label = 'Disstribution of Wages in some Popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()```

Comparing the performance of left-footed and right-footed footballers

```# ballcontrol vs dribbing

sns.lmplot(x = 'BallControl', y = 'Dribbling', data = data, col = 'Preferred Foot')
plt.show()```