usecols=['nconst','primaryName','primaryProfession'],# Considering only this columns
dtype={'primaryName':'U','primaryProfession':'U'},# Both are unsigned integers
converters={'nconst':lambdax:int(x.lstrip("nm0"))})# All actors starts with nm0, we are just cleaning the output
df_attori.query('primaryProfession.str.contains("actor") or primaryProfession.str.contains("actress")',inplace=True)
# A lot of actors/actresses do more than one job (director etc..), with this comand I take all the names that have the string "actor" or "actress" in their profession. In this way both someone who is classified as "actor" or as "actor, director" are taken into consideration
usecols=['tconst','primaryTitle','isAdult','titleType'],# Considering only this columns
dtype={'primaryTitle':'U','titleType':'U'},# Both are unsigned integers
converters={'tconst':lambdax:int(x.lstrip("t0")),'isAdult':lambdax:x!="0"})# # All movies starts with t0, we are just cleaning the output. Then remove all adult movies
df_film.query('not isAdult and titleType in ["movie", "tvSeries", "tvMovie", "tvMiniSeries"]',
inplace=True)
inplace=True)# There are a lot of junk categories considered in IMDb, we are considering all the non Adult movies in this whitelist