added script for discrepancy in actors graph
parent
4cb8416a37
commit
31b0ed8117
@ -0,0 +1,28 @@
|
|||||||
|
import os
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
dfs = {
|
||||||
|
i: pd.read_csv(f"top_actors_{i:02d}_c.txt", sep='\t', usecols=[1], names=["actor"])
|
||||||
|
for i in [5] + list(range(10, 71, 10))}
|
||||||
|
sets = {i: set(df["actor"]) for i, df in dfs.items()}
|
||||||
|
|
||||||
|
diff = []
|
||||||
|
for i in sets.keys():
|
||||||
|
diff.append([len(sets[i]) - len(sets[i] & sets[j]) for j in sets.keys()])
|
||||||
|
diff = np.array(diff, dtype=float)
|
||||||
|
diff /= len(next(iter(sets.values())))
|
||||||
|
|
||||||
|
plt.matshow(diff)
|
||||||
|
for (i, j), z in np.ndenumerate(diff):
|
||||||
|
plt.gca().text(j, i, f'{z:0.2f}', ha='center', va='center')
|
||||||
|
plt.gca().set_xticks(np.linspace(0.0, len(sets) - 1, len(sets)))
|
||||||
|
plt.gca().set_yticks(np.linspace(0.0, len(sets) - 1, len(sets)))
|
||||||
|
plt.gca().set_xticklabels([f"{i:d}" for i in sets.keys()])
|
||||||
|
plt.gca().set_yticklabels([f"{i:d}" for i in sets.keys()])
|
||||||
|
plt.ylabel("\nMIN_ACTORS value")
|
||||||
|
plt.xlabel("\nMIN_ACTORS value")
|
||||||
|
cb = plt.colorbar()
|
||||||
|
cb.set_label("\npercentace of difference in results varing MIN_MOVIES")
|
||||||
|
plt.show()
|
Loading…
Reference in New Issue