
Founded in 1904 to provide unity among national soccer associations, the Federation Internationale de Football Association (FIFA) boasts 209 members, rivaling that of the United Nations, and is arguably the most prestigious sports organization in the world.
In this Data Science Project we will do some analysis on the matches and records of FIFA with Python.
Lets start by importing libraries
import numpy as np import pandas as pd # for visualizations import matplotlib.pyplot as plt import seaborn as sns sns.set()
You can download the data set we need for this task from here:
data = pd.read_csv('data.csv') print(data.shape)
#Output- (18207, 89)
To check the first 5 rows and columns
data.head()

Let’s Eye on Indian Footballers
def country(x): return data[data['Nationality'] == x][['Name','Overall','Potential','Position']] # let's check the Indian Players country('India')

Analyzing Club Data( Manchester United)
def club(x): return data[data['Club'] == x][['Name','Jersey Number','Position','Overall','Nationality','Age','Wage', 'Value','Contract Valid Until']] club('Manchester United')

x = club('Manchester United') x.shape
#Output– (33, 9)
Describing the data
data.describe()

Filling the missing value for the continuous variables for proper data visualization
data['ShortPassing'].fillna(data['ShortPassing'].mean(), inplace = True) data['Volleys'].fillna(data['Volleys'].mean(), inplace = True) data['Dribbling'].fillna(data['Dribbling'].mean(), inplace = True) data['Curve'].fillna(data['Curve'].mean(), inplace = True) data['FKAccuracy'].fillna(data['FKAccuracy'], inplace = True) data['LongPassing'].fillna(data['LongPassing'].mean(), inplace = True) data['BallControl'].fillna(data['BallControl'].mean(), inplace = True) data['HeadingAccuracy'].fillna(data['HeadingAccuracy'].mean(), inplace = True) data['Finishing'].fillna(data['Finishing'].mean(), inplace = True) data['Crossing'].fillna(data['Crossing'].mean(), inplace = True) data['Weight'].fillna('200lbs', inplace = True) data['Contract Valid Until'].fillna(2019, inplace = True) data['Height'].fillna("5'11", inplace = True) data['Loaned From'].fillna('None', inplace = True) data['Joined'].fillna('Jul 1, 2018', inplace = True) data['Jersey Number'].fillna(8, inplace = True) data['Body Type'].fillna('Normal', inplace = True) data['Position'].fillna('ST', inplace = True) data['Club'].fillna('No Club', inplace = True) data['Work Rate'].fillna('Medium/ Medium', inplace = True) data['Skill Moves'].fillna(data['Skill Moves'].median(), inplace = True) data['Weak Foot'].fillna(3, inplace = True) data['Preferred Foot'].fillna('Right', inplace = True) data['International Reputation'].fillna(1, inplace = True) data['Wage'].fillna('€200K', inplace = True) data.fillna(0, inplace = True)
def defending(data): return int(round((data[['Marking', 'StandingTackle', 'SlidingTackle']].mean()).mean())) def general(data): return int(round((data[['HeadingAccuracy', 'Dribbling', 'Curve', 'BallControl']].mean()).mean())) def mental(data): return int(round((data[['Aggression', 'Interceptions', 'Positioning', 'Vision','Composure']].mean()).mean())) def passing(data): return int(round((data[['Crossing', 'ShortPassing', 'LongPassing']].mean()).mean())) def mobility(data): return int(round((data[['Acceleration', 'SprintSpeed', 'Agility','Reactions']].mean()).mean())) def power(data): return int(round((data[['Balance', 'Jumping', 'Stamina', 'Strength']].mean()).mean())) def rating(data): return int(round((data[['Potential', 'Overall']].mean()).mean())) def shooting(data): return int(round((data[['Finishing', 'Volleys', 'FKAccuracy', 'ShotPower','LongShots', 'Penalties']].mean()).mean()))
Renaming the columns
data.rename(columns={'Club Logo':'Club_Logo'}, inplace=True) # adding these categories to the data data['Defending'] = data.apply(defending, axis = 1) data['General'] = data.apply(general, axis = 1) data['Mental'] = data.apply(mental, axis = 1) data['Passing'] = data.apply(passing, axis = 1) data['Mobility'] = data.apply(mobility, axis = 1) data['Power'] = data.apply(power, axis = 1) data['Rating'] = data.apply(rating, axis = 1) data['Shooting'] = data.apply(shooting, axis = 1)
players = data[['Name','Defending','General','Mental','Passing', 'Mobility','Power','Rating','Shooting','Flag','Age', 'Nationality', 'Photo', 'Club_Logo', 'Club']] players.head()

Data Visualization
Comparison of preferred foot over the different players
plt.rcParams['figure.figsize'] = (10, 5) sns.countplot(data['Preferred Foot'], palette = 'pink') plt.title('Most Preferred Foot of the Players', fontsize = 20) plt.show()

Plotting a pie chart to represent share of international reputation
labels = ['1', '2', '3', '4', '5'] sizes = data['International Reputation'].value_counts() colors = plt.cm.copper(np.linspace(0, 1, 5)) explode = [0.1, 0.1, 0.2, 0.5, 0.9] plt.rcParams['figure.figsize'] = (9, 9) plt.pie(sizes, labels = labels, colors = colors, explode = explode, shadow = True) plt.title('International Repuatation for the Football Players', fontsize = 20) plt.legend() plt.show()

Different positions acquired by the players
plt.figure(figsize = (18, 8)) plt.style.use('fivethirtyeight') ax = sns.countplot('Position', data = data, palette = 'bone') ax.set_xlabel(xlabel = 'Different Positions in Football', fontsize = 16) ax.set_ylabel(ylabel = 'Count of Players', fontsize = 16) ax.set_title(label = 'Comparison of Positions and Players', fontsize = 20) plt.show()

Defining a function for cleaning the Weight data
def extract_value_from(value): out = value.replace('lbs', '') return float(out) # applying the function to weight column #data['value'] = data['value'].apply(lambda x: extract_value_from(x)) data['Weight'] = data['Weight'].apply(lambda x : extract_value_from(x)) data['Weight'].head()
#Output 0 159.0 1 183.0 2 150.0 3 168.0 4 154.0 Name: Weight, dtype: float64
Defining a function for cleaning the wage column
def extract_value_from(Value): out = Value.replace('€', '') if 'M' in out: out = float(out.replace('M', ''))*1000000 elif 'K' in Value: out = float(out.replace('K', ''))*1000 return float(out)
Applying the function to the wage column
data['Value'] = data['Value'].apply(lambda x: extract_value_from(x)) data['Wage'] = data['Wage'].apply(lambda x: extract_value_from(x)) data['Wage'].head()
#Output 0 565000.0 1 405000.0 2 290000.0 3 260000.0 4 355000.0 Name: Wage, dtype: float64
Comparing the players’ Wages
import warnings warnings.filterwarnings('ignore') plt.rcParams['figure.figsize'] = (15, 5) sns.distplot(data['Wage'], color = 'blue') plt.xlabel('Wage Range for Players', fontsize = 16) plt.ylabel('Count of the Players', fontsize = 16) plt.title('Distribution of Wages of Players', fontsize = 20) plt.xticks(rotation = 90) plt.show()

Skill Moves of Players
plt.figure(figsize = (10, 8)) ax = sns.countplot(x = 'Skill Moves', data = data, palette = 'pastel') ax.set_title(label = 'Count of players on Basis of their skill moves', fontsize = 20) ax.set_xlabel(xlabel = 'Number of Skill Moves', fontsize = 16) ax.set_ylabel(ylabel = 'Count', fontsize = 16) plt.show()

Height of Players
plt.figure(figsize = (13, 8)) ax = sns.countplot(x = 'Height', data = data, palette = 'dark') ax.set_title(label = 'Count of players on Basis of Height', fontsize = 20) ax.set_xlabel(xlabel = 'Height in Foot per inch', fontsize = 16) ax.set_ylabel(ylabel = 'Count', fontsize = 16) plt.show()

To show Different body weight of the players participating in the FIFA 2019
plt.figure(figsize = (20, 5)) sns.distplot(data['Weight'], color = 'pink') plt.title('Different Weights of the Players Participating in FIFA 2019', fontsize = 20) plt.xlabel('Heights associated with the players', fontsize = 16) plt.ylabel('count of Players', fontsize = 16) plt.show()

To show Different Work rate of the players participating in the FIFA 2019
plt.figure(figsize = (15, 7)) sns.countplot(x = 'Work Rate', data = data, palette = 'hls') plt.title('Different work rates of the Players Participating in the FIFA 2019', fontsize = 20) plt.xlabel('Work rates associated with the players', fontsize = 16) plt.ylabel('count of Players', fontsize = 16) plt.show()

To show Different Speciality Score of the players participating in the FIFA 2019
x = data.Special plt.figure(figsize = (12, 8)) plt.style.use('tableau-colorblind10') ax = sns.distplot(x, bins = 58, kde = False, color = 'm') ax.set_xlabel(xlabel = 'Special score range', fontsize = 16) ax.set_ylabel(ylabel = 'Count of the Players',fontsize = 16) ax.set_title(label = 'Histogram for the Speciality Scores of the Players', fontsize = 20) plt.show()

To show Different potential scores of the players participating in the FIFA 2019
x = data.Potential plt.figure(figsize=(12,8)) plt.style.use('seaborn-paper') ax = sns.distplot(x, bins = 58, kde = False, color = 'y') ax.set_xlabel(xlabel = "Player\'s Potential Scores", fontsize = 16) ax.set_ylabel(ylabel = 'Number of players', fontsize = 16) ax.set_title(label = 'Histogram of players Potential Scores', fontsize = 20) plt.show()

To show Different nations participating in the FIFA 2019
plt.style.use('dark_background') data['Nationality'].value_counts().head(80).plot.bar(color = 'orange', figsize = (20, 7)) plt.title('Different Nations Participating in FIFA 2019', fontsize = 30, fontweight = 20) plt.xlabel('Name of The Country') plt.ylabel('count') plt.show()

Countries with Most Players
Picking up the countries with highest number of players to compare their overall scores
data['Nationality'].value_counts().head(8)
#Output- England 1662 Germany 1198 Spain 1072 Argentina 937 France 914 Brazil 827 Italy 702 Colombia 618 Name: Nationality, dtype: int64
Every Nations’ Player and their Weights
some_countries = ('England', 'Germany', 'Spain', 'Argentina', 'France', 'Brazil', 'Italy', 'Columbia') data_countries = data.loc[data['Nationality'].isin(some_countries) & data['Weight']] plt.rcParams['figure.figsize'] = (15, 7) ax = sns.violinplot(x = data_countries['Nationality'], y = data_countries['Weight'], palette = 'Reds') ax.set_xlabel(xlabel = 'Countries', fontsize = 9) ax.set_ylabel(ylabel = 'Weight in lbs', fontsize = 9) ax.set_title(label = 'Distribution of Weight of players from different countries', fontsize = 20) plt.show()

Finding the the popular clubs around the globe
data['Club'].value_counts().head(10)
#Output No Club 241 Liverpool 33 TSG 1899 Hoffenheim 33 Burnley 33 CD Leganés 33 Arsenal 33 Southampton 33 Frosinone 33 Empoli 33 Fortuna Düsseldorf 33 Name: Club, dtype: int64
some_clubs = ('CD Leganés', 'Southampton', 'RC Celta', 'Empoli', 'Fortuna Düsseldorf', 'Manchestar City', 'Tottenham Hotspur', 'FC Barcelona', 'Valencia CF', 'Chelsea', 'Real Madrid') data_clubs = data.loc[data['Club'].isin(some_clubs) & data['Overall']] plt.rcParams['figure.figsize'] = (15, 8) ax = sns.boxplot(x = data_clubs['Club'], y = data_clubs['Overall'], palette = 'inferno') ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9) ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9) ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20) plt.xticks(rotation = 90) plt.show()

Distribution of Wages in some Popular clubs
some_clubs = ('CD Leganés', 'Southampton', 'RC Celta', 'Empoli', 'Fortuna Düsseldorf', 'Manchestar City', 'Tottenham Hotspur', 'FC Barcelona', 'Valencia CF', 'Chelsea', 'Real Madrid') data_club = data.loc[data['Club'].isin(some_clubs) & data['Wage']] plt.rcParams['figure.figsize'] = (16, 8) ax = sns.boxplot(x = 'Club', y = 'Wage', data = data_club, palette = 'Reds') ax.set_xlabel(xlabel = 'Names of some popular Clubs', fontsize = 10) ax.set_ylabel(ylabel = 'Distribution', fontsize = 10) ax.set_title(label = 'Disstribution of Wages in some Popular Clubs', fontsize = 20) plt.xticks(rotation = 90) plt.show()

Comparing the performance of left-footed and right-footed footballers
# ballcontrol vs dribbing sns.lmplot(x = 'BallControl', y = 'Dribbling', data = data, col = 'Preferred Foot') plt.show()
