Wilco Discography Analysis¶

Prepared by Karl Duckett - April 2021

Image source: https://www.nonesuch.com/journal/cbs-sunday-morning-wilco-finally-getting-the-respect-they-deserve-2009-08-24

If you don't know who Wilco are, have a listen on Spotify https://open.spotify.com/artist/2QoU3awHVdcHS8LrZEKvSM

Imports & Setup¶

# Not all of these are used in this report, but it's my standard copy and paste for each Jupyter Notebook developed.
import pandas as pd
import numpy as np
import seaborn as sns
import os
import re
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px

from numbers import Number
from tabulate import tabulate
from scipy import stats
import datetime

from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

pd.options.plotting.backend = "plotly"
pd.options.display.max_columns = 100
pd.options.display.max_colwidth = None
py.init_notebook_mode(connected=True)

# Import the data - this data was extract via Wikipedia + Lyric Genius website
df = pd.read_csv('Wilco.csv')

Clean Up¶

Create new columns change minutes to seconds and extracting the word count.

def time_convert(x):
    m,s = map(int,x.split(':'))
    return (m)*60+s

df['Seconds'] = df['Duration'].apply(time_convert)

df['WordCount'] =  df['Lyrics'].str.split().str.len()

df.head(2)

Create Elements¶

Word Clouds¶

text = " ".join(review for review in df['Lyrics'])
stopwords = set(STOPWORDS)
stopwords.update(["know", "go", "want", "will", "see"])

import random
def grey_color_func(word, font_size, position, orientation, random_state=None,
                    **kwargs):
    return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)

# % time will return how long it took to execute this line (only line!)
%time wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=1600,height=900).generate(text)
plt.figure(figsize = (32,18))
plt.imshow(wordcloud.recolor(random_state=3),
           interpolation="bilinear")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

Wall time: 2.53 s

Word cloud of all song titles

Word Count and Duration¶

text = " ".join(review for review in df['Lyrics'])
text = text.split()
text = [x.upper() for x in text];

def wordListToFreqDict(text):
    wordfreq = [text.count(p) for p in text]
    return dict(list(zip(text,wordfreq)))

word_dict = wordListToFreqDict(text)

def sortFreqDict(word_dict):
    aux = [(word_dict[key], key) for key in word_dict]
    aux.sort()
    aux.reverse()
    return aux

total_words = sortFreqDict(word_dict);
print('Total words in all Wilco songs: ' + str(len(total_words)))

Total words in all Wilco songs: 3069

df.set_index('Title', inplace=True)
df.reset_index(inplace=True)

fig = px.scatter(df, x="Seconds", y="WordCount", color="Album", hover_name='Title')
fig.show()

from plotly.subplots import make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(
        x=df["Title"], 
        y=df["WordCount"],
        name='Words',
        showlegend=False),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=df['Title'],
        y=df['Seconds'],
        mode="lines",
        name='Seconds',
        line=go.scatter.Line(color="#ff4f5b"),
        showlegend=False),
    secondary_y=True
)

fig.update_layout(
    title_text='Word Count and Duration by Title',
    height=600)

fig.update_traces(marker_color='#ffcf4d')

fig.update_yaxes(title_text="Word Count", secondary_y=False)
fig.update_yaxes(title_text="Duration (seconds)", secondary_y=True)
fig.update_xaxes(showticklabels=False)
fig.update_layout(title_text='Word Count & Duration by Song', title_x=0.5)

fig.show()

album_totals = df.groupby(['Album']).sum()
album_totals.drop(['Tempo', 'Key', 'Chords'], axis=1, inplace=True)
album_totals

album_totals.plot()

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Bar(
        x=album_totals.index, 
        y=album_totals["WordCount"],
        name='Words',
        showlegend=False),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(
        x=album_totals.index,
        y=album_totals['Seconds'],
        mode="lines",
        line=go.scatter.Line(color="gray"),
        showlegend=False,
        name='Seconds'),
    secondary_y=True
)

fig.update_layout(
    xaxis_title="Album Name",
    title_text='Word Count and Duration by Title',
    height=600)

fig.update_yaxes(title_text="Word Count", secondary_y=False)
fig.update_yaxes(title_text="Duration (seconds)", secondary_y=True)
fig.update_layout(hovermode="x unified")
fig.update_layout(title_text='Word Count & Total Duration by Album', title_x=0.5)

fig.show()

Poster Image¶

By manipulating some of the charts generated above in SVG format and a little magic in Photoshop we have a few design options on how to present the data.

There we go! Ready to hang upon any Wilco fanatics wall.

Explore more coding projects and data analysis at www.karlduckett.com. If you would like the large scale version of the poster and alternative designs, use the contact me section and get in touch :)

	Title	Album	Duration	Tempo	Key	Chords	Lyrics	Seconds	WordCount
0	I Must Be High	A.M	2:59	NaN	NaN	NaN	You always wanted more time,\nTo do what you always wanted to do\nNow you got it\n\nAnd I, I must be high,\nTo say goodbye\nBye bye bye\n\nYou never said you needed this\nAnd you're pissed that you missed the very last kiss,\nFrom my lips\n\nAnd I, I must be high,\nTo say goodbye\nBye bye bye\n\nAnd you never looked in my eyes,\nLong enough to find any piece of mind\nBut now you got it\n\nAnd I, I must be high,\nTo let you say goodbye\nBye bye bye	179	94
1	Casino Queen	A.M	2:49	NaN	NaN	NaN	Well the money's pouring down and the people all look down,\nAnd it's floating out of town\nI hit the second deck and I spend my paycheck,\nAnd my wife that I just met, she's looking like a wreck\n\nCasino Queen, my lord you're mean\nI've been gambling like a fiend on your tables so green\n\nI always bet on black, blackjack,\nI'll pay you back\nThe room fills with smoke and I'm already broke,\nAnd the dealer keeps on joking as he takes my last token\n\nCasino Queen, my lord you're mean\nI've been gambling like a fiend on your tables so green\n\nCasino Queen, my lord you're mean\nI've been gambling like a fiend on your tables so green	169	121

	Seconds	WordCount
Album
A Ghost Is Born	4393	2034
A.M	2671	1520
Being There	4616	2635
Ode to Joy	2557	1445
Schmilco	2182	1622
Sky Blue Sky	3056	1703
Star Wars	2027	1400
Summerteeth	3393	2457
The Whole Love	4782	2427
Wilco (The Album)	2831	2157
Yankee Hotel Foxtrot	3094	1967