Imdb Top 1000 With Plotapi Heatmap Data Pre Imdb Redesign

import requests
import lxml.html
import itertools
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

               
url = f"https://www.imdb.com/search/title/?groups=top_1000&sort=user_rating,desc&count=200&start=1&ref_=adv_nxt"
response = requests.get(url)
content = response.content
html = lxml.html.fromstring(content)
items = html.xpath("//div[contains(@class, 'lister-item mode-advanced')]") 

item = items[1]

name = item.xpath('.//h3[@class="lister-item-header"]//a/text()')[0]
img = item.xpath('.//div[@class="lister-item-image float-left"]//a//img/@loadlate')[0]
rating = item.xpath('.//div[@class="inline-block ratings-imdb-rating"]//strong/text()')[0]
genre = item.xpath('.//span[@class="genre"]//text()')[0].strip()
runtime = item.xpath('.//span[@class="runtime"]//text()')[0].strip()
certificate = item.xpath('.//span[@class="certificate"]//text()')[0].strip()
metascore = item.xpath('.//div[@class="inline-block ratings-metascore"]//span/text()')[0].strip()
url = "https://www.imdb.com" + item.xpath('.//h3[@class="lister-item-header"]//a/@href')[0]
eww = item.xpath('.//p[@class="sort-num_votes-visible"]//span/@data-value')[0].strip()
votes = item.xpath('.//p[@class="sort-num_votes-visible"]//span/@data-value')[1].strip()

[name,img,rating,genre,runtime,certificate,metascore,eww,votes,url]

import requests
import lxml.html
import itertools
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

                    
items = []
for start in range(1,1000,200):
    url = f"https://www.imdb.com/search/title/?groups=top_1000&sort=user_rating,desc&count=200&start={start}&ref_=adv_nxt"
    response = requests.get(url)
    content = response.content
    html = lxml.html.fromstring(content)
    items += html.xpath("//div[contains(@class, 'lister-item mode-advanced')]") 
    
data = pd.DataFrame(columns=['name', 'img', 'rating', 'genre', 'gross'])

for item in items:
    name = item.xpath('.//h3[@class="lister-item-header"]//a/text()')[0]
    img = item.xpath('.//div[@class="lister-item-image float-left"]//a//img/@loadlate')[0]
    rating = item.xpath('.//div[@class="inline-block ratings-imdb-rating"]//strong/text()')[0]
    genre = item.xpath('.//span[@class="genre"]//text()')[0].strip()
    runtime = item.xpath('.//span[@class="runtime"]//text()')[0].strip()
    
    try:
        certificate = item.xpath('.//span[@class="certificate"]//text()')[0].strip()
    except:
        certificate = "N/A"
        
    try:
        metascore = item.xpath('.//div[@class="inline-block ratings-metascore"]//span/text()')[0].strip()
    except:
        metascore = "N/A"
    
    url = "https://www.imdb.com" + item.xpath('.//h3[@class="lister-item-header"]//a/@href')[0]
    
    try:
        eww = item.xpath('.//p[@class="sort-num_votes-visible"]//span/@data-value')[1].strip()   
    except:
        eww = "N/A"
    
    
    try:
        votes = item.xpath('.//p[@class="sort-num_votes-visible"]//span/@data-value')[0].strip()   
    except:
        votes = "N/A"
    # eww = item.xpath('.//p[@class="sort-num_votes-visible"]//span[last()]/text()')[0].strip()
    # eww = eww if "$" in eww else "N/A"
    
    data = data.append({'name': name, 'img': img, 'rating': rating, 'genre': genre, 'gross': eww,
                        'runtime': runtime, 'certificate': certificate, 'metascore': metascore, 'votes': votes, 'url': url}, ignore_index=True)

raw_data = data
data['genre'] = data['genre'].str.split(",")

raw_data.head()

raw_data.to_csv("IMDb_top_1000_August.csv")

strip out whitespace

for index, row in data.iterrows():
    genre = [x.strip(' ') for x in row.genre]
    row.genre = genre

genres = [st for row in data.genre for st in row]
genres = set(genres)
genres = sorted(genres)

colors = ["#e6194B", "#3cb44b", "#FFDE0A", "#4363d8", "#f58231", "#911eb4",
          "#42d4f4", "#f032e6", "#bfef45", "#fabebe", "#469990", "#e6beff",
          "#9A6324", "#FFEB0A", "#800000", "#00F549", "#a9a9a9", "#FFB870",
          "#000075", "#a9a9a9", "#c6d94B"]
def bg_color(genre):
    return colors[genres.index(genre)]

df = pd.DataFrame(0, columns=genres, index=genres)
df

data_table = 'Genre_1,Genre_2,,Name,Genres,Rating,Gross'

for index, row in data.iterrows():
    if len(row.genre) == 1:
        df[row.genre[0]][row.genre[0]] += 1
        data_table += f'\n{row.genre[0]},{row.genre[0]},"<img src=""{row.img}"">","<a style=""color:black;"" href=""{row["url"]}"">{row["name"]}</a>","{(", ".join(["<span style=""color:white;background-color:"+bg_color(x)+";display:inline-block;border-radius:10px;padding:5px 7px;color:white;background-color:#444"">"+x+"</span>" for x in row.genre]))}","<span style=""display:inline-block;border-radius:10px;padding:5px 7px;color:white;background-color:#444"">{row.rating}</span>","{row.gross}"'
    else:
        for genre in list(itertools.combinations(row.genre, 2)):
            df[genre[0]][genre[1]] += 1
            df[genre[1]][genre[0]] += 1
            data_table += f'\n{genre[0]},{genre[1]},"<img src=""{row.img}"">","<a style=""color:black;"" href=""{row["url"]}"">{row["name"]}</a>","{("<br>".join(["<span style=""margin-bottom:2px;color:white;background-color:"+bg_color(x)+";display:inline-block;border-radius:10px;padding:5px 7px;color:white;background-color:#444"">"+x+"</span>" for x in row.genre]))}","<span style=""display:inline-block;border-radius:10px;padding:5px 7px;color:white;background-color:#444"">{row.rating}</span>","{row.gross}"'

f'\n{"<br>".join(["<span style=""background-color:"+bg_color(x)+";display:inline-block;border-radius:10px;padding:5px 7px;color:white;background-color:#444"">"+x+"</span>" for x in row.genre])}'

from plotapi import Chord
import json

from plotapi import Chord
  
Chord.api_key("5dc347fb-d747-474f-93e8-4e1a9f5d41dd")

# Visualization code here

Chord(df.values.tolist(), genres, colors="movies",data_table=data_table, data_table_show_indices=False, data_table_unique_column="Name", compress=True, data_table_column_width=90,     noun="titles", padding=0.03,animated_intro=True
).show()

import json

data = {"matrix": df.values.tolist(),
        "names": genres,
        "data_table": data_table}

with open("imdb_top_1000.json", "w") as fp:
    json.dump(data, fp)

df

genres

Imdb Top 1000 With Plotapi Heatmap Data Pre Imdb Redesign

Pokemon Types with Chord

Animal Crossing Villager Species and Personality

IMDb Top 1000 with Chord

Desktop Browsers Market Share with Pie Fight

Pokemon Trends with Bar Fight

Degree Classification by Graduate Gender with Terminus

Degree Classification by Graduate Ethnicity with Terminus

Global Email Spam with Terminus

Apple 2021 Q3 Results with Sankey

Apple 2021 Q4 Results with Sankey

League of Legends Classes

Pokemon Types with Heat Map

Video Game Publishers and Genres with SplitChord

Top Olympic Medal Earning Countries

League of Legends World Championship

Animal Crossing Villager Style

IMDb Top 1000 with Heat Map

StamiStudios Panels and Colours