diff --git a/neos_dataset_preprocessing.ipynb b/neos_dataset_preprocessing.ipynb
new file mode 100644
index 0000000..e9dfb19
--- /dev/null
+++ b/neos_dataset_preprocessing.ipynb
@@ -0,0 +1,1044 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b97688d5-dbd8-451e-924b-902649ef3712",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "import math"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "838b4878-cb24-4330-ac58-48de11ee1372",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " spkid | \n",
+ " pha | \n",
+ " H | \n",
+ " epoch_mjd | \n",
+ " e | \n",
+ " a | \n",
+ " i | \n",
+ " om | \n",
+ " w | \n",
+ " ma | \n",
+ " moid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 20000433 | \n",
+ " N | \n",
+ " 10.41 | \n",
+ " 60400 | \n",
+ " 0.2227 | \n",
+ " 1.458 | \n",
+ " 10.83 | \n",
+ " 304.28 | \n",
+ " 178.90 | \n",
+ " 334.73 | \n",
+ " 0.1500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 20000719 | \n",
+ " N | \n",
+ " 15.59 | \n",
+ " 60400 | \n",
+ " 0.5469 | \n",
+ " 2.636 | \n",
+ " 11.58 | \n",
+ " 183.85 | \n",
+ " 156.22 | \n",
+ " 102.37 | \n",
+ " 0.2010 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20000887 | \n",
+ " N | \n",
+ " 13.88 | \n",
+ " 60400 | \n",
+ " 0.5710 | \n",
+ " 2.472 | \n",
+ " 9.40 | \n",
+ " 110.42 | \n",
+ " 350.48 | \n",
+ " 289.48 | \n",
+ " 0.0803 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 20001036 | \n",
+ " N | \n",
+ " 9.26 | \n",
+ " 60400 | \n",
+ " 0.5328 | \n",
+ " 2.665 | \n",
+ " 26.69 | \n",
+ " 215.50 | \n",
+ " 132.48 | \n",
+ " 321.69 | \n",
+ " 0.3450 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 20001221 | \n",
+ " N | \n",
+ " 17.38 | \n",
+ " 60400 | \n",
+ " 0.4352 | \n",
+ " 1.920 | \n",
+ " 11.88 | \n",
+ " 171.31 | \n",
+ " 26.68 | \n",
+ " 197.64 | \n",
+ " 0.1080 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " spkid pha H epoch_mjd e a i om w \\\n",
+ "0 20000433 N 10.41 60400 0.2227 1.458 10.83 304.28 178.90 \n",
+ "1 20000719 N 15.59 60400 0.5469 2.636 11.58 183.85 156.22 \n",
+ "2 20000887 N 13.88 60400 0.5710 2.472 9.40 110.42 350.48 \n",
+ "3 20001036 N 9.26 60400 0.5328 2.665 26.69 215.50 132.48 \n",
+ "4 20001221 N 17.38 60400 0.4352 1.920 11.88 171.31 26.68 \n",
+ "\n",
+ " ma moid \n",
+ "0 334.73 0.1500 \n",
+ "1 102.37 0.2010 \n",
+ "2 289.48 0.0803 \n",
+ "3 321.69 0.3450 \n",
+ "4 197.64 0.1080 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "folder='/home/unipi/v.vichi3/Desktop/'\n",
+ "df=pd.read_csv(folder+'sbdb_query_results.csv')\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "7f165f80-3253-49b0-b43c-a8722f60e57b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(34901, 11)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "c41deece-a459-4ef3-8c83-0f87756dac92",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['spkid', 'pha', 'H', 'epoch_mjd', 'e', 'a', 'i', 'om', 'w', 'ma',\n",
+ " 'moid'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f91e7f7c-4d5a-44c8-b0ba-9c2b71a01839",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 34901 entries, 0 to 34900\n",
+ "Data columns (total 11 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 spkid 34901 non-null int64 \n",
+ " 1 pha 34875 non-null object \n",
+ " 2 H 34897 non-null float64\n",
+ " 3 epoch_mjd 34901 non-null int64 \n",
+ " 4 e 34901 non-null float64\n",
+ " 5 a 34901 non-null float64\n",
+ " 6 i 34901 non-null float64\n",
+ " 7 om 34901 non-null float64\n",
+ " 8 w 34901 non-null float64\n",
+ " 9 ma 34901 non-null float64\n",
+ " 10 moid 34876 non-null float64\n",
+ "dtypes: float64(8), int64(2), object(1)\n",
+ "memory usage: 2.9+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "eb543294-e46b-431e-862a-a0244cadd9fc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(34876, 11)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Eliminate the null values\n",
+ "df.drop(df[df['moid'].isna()].index,inplace=True)\n",
+ "df.reset_index(drop=True,inplace=True)\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "e4ad2c21-bd10-4714-9d9a-aeb4ed639b36",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " spkid | \n",
+ " H | \n",
+ " epoch_mjd | \n",
+ " e | \n",
+ " a | \n",
+ " i | \n",
+ " om | \n",
+ " w | \n",
+ " ma | \n",
+ " moid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 3.487600e+04 | \n",
+ " 34872.000000 | \n",
+ " 34876.000000 | \n",
+ " 34876.000000 | \n",
+ " 34876.000000 | \n",
+ " 34876.000000 | \n",
+ " 34876.000000 | \n",
+ " 34876.000000 | \n",
+ " 34876.000000 | \n",
+ " 3.487600e+04 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 2.359880e+07 | \n",
+ " 23.486083 | \n",
+ " 59701.479986 | \n",
+ " 0.437249 | \n",
+ " 1.764192 | \n",
+ " 12.005071 | \n",
+ " 171.924096 | \n",
+ " 182.571561 | \n",
+ " 171.755088 | \n",
+ " 8.552434e-02 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 2.362820e+07 | \n",
+ " 2.894608 | \n",
+ " 1572.136162 | \n",
+ " 0.176989 | \n",
+ " 2.117845 | \n",
+ " 10.694688 | \n",
+ " 103.659852 | \n",
+ " 104.303965 | \n",
+ " 122.272791 | \n",
+ " 9.834009e-02 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 3.001703e+06 | \n",
+ " 9.260000 | \n",
+ " 44221.000000 | \n",
+ " 0.002800 | \n",
+ " 0.461700 | \n",
+ " 0.010000 | \n",
+ " 0.010000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 4.540000e-07 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 3.701855e+06 | \n",
+ " 21.320000 | \n",
+ " 59976.000000 | \n",
+ " 0.304500 | \n",
+ " 1.294000 | \n",
+ " 4.420000 | \n",
+ " 80.380000 | \n",
+ " 93.357500 | \n",
+ " 49.480000 | \n",
+ " 1.280000e-02 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 3.843806e+06 | \n",
+ " 23.800000 | \n",
+ " 60400.000000 | \n",
+ " 0.451700 | \n",
+ " 1.693000 | \n",
+ " 8.490000 | \n",
+ " 171.660000 | \n",
+ " 184.530000 | \n",
+ " 164.700000 | \n",
+ " 4.520000e-02 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 5.416717e+07 | \n",
+ " 25.600000 | \n",
+ " 60400.000000 | \n",
+ " 0.565000 | \n",
+ " 2.172000 | \n",
+ " 16.810000 | \n",
+ " 252.402500 | \n",
+ " 272.662500 | \n",
+ " 291.902500 | \n",
+ " 1.280000e-01 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 5.443990e+07 | \n",
+ " 33.580000 | \n",
+ " 60435.000000 | \n",
+ " 0.996200 | \n",
+ " 332.600000 | \n",
+ " 165.580000 | \n",
+ " 359.980000 | \n",
+ " 359.960000 | \n",
+ " 360.000000 | \n",
+ " 7.080000e-01 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " spkid H epoch_mjd e a \\\n",
+ "count 3.487600e+04 34872.000000 34876.000000 34876.000000 34876.000000 \n",
+ "mean 2.359880e+07 23.486083 59701.479986 0.437249 1.764192 \n",
+ "std 2.362820e+07 2.894608 1572.136162 0.176989 2.117845 \n",
+ "min 3.001703e+06 9.260000 44221.000000 0.002800 0.461700 \n",
+ "25% 3.701855e+06 21.320000 59976.000000 0.304500 1.294000 \n",
+ "50% 3.843806e+06 23.800000 60400.000000 0.451700 1.693000 \n",
+ "75% 5.416717e+07 25.600000 60400.000000 0.565000 2.172000 \n",
+ "max 5.443990e+07 33.580000 60435.000000 0.996200 332.600000 \n",
+ "\n",
+ " i om w ma moid \n",
+ "count 34876.000000 34876.000000 34876.000000 34876.000000 3.487600e+04 \n",
+ "mean 12.005071 171.924096 182.571561 171.755088 8.552434e-02 \n",
+ "std 10.694688 103.659852 104.303965 122.272791 9.834009e-02 \n",
+ "min 0.010000 0.010000 0.000000 0.000000 4.540000e-07 \n",
+ "25% 4.420000 80.380000 93.357500 49.480000 1.280000e-02 \n",
+ "50% 8.490000 171.660000 184.530000 164.700000 4.520000e-02 \n",
+ "75% 16.810000 252.402500 272.662500 291.902500 1.280000e-01 \n",
+ "max 165.580000 359.980000 359.960000 360.000000 7.080000e-01 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "292a8389-dcdb-4381-a547-0426c56b8fb0",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " spkid | \n",
+ " pha | \n",
+ " H | \n",
+ " epoch_mjd | \n",
+ " e | \n",
+ " a | \n",
+ " i | \n",
+ " om | \n",
+ " w | \n",
+ " ma | \n",
+ " moid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3510 | \n",
+ " 3024715 | \n",
+ " Y | \n",
+ " 17.69 | \n",
+ " 60400 | \n",
+ " 0.9480 | \n",
+ " 17.800 | \n",
+ " 19.67 | \n",
+ " 48.69 | \n",
+ " 333.32 | \n",
+ " 117.04 | \n",
+ " 0.0111 | \n",
+ "
\n",
+ " \n",
+ " 6483 | \n",
+ " 3434371 | \n",
+ " N | \n",
+ " 24.11 | \n",
+ " 54767 | \n",
+ " 0.9060 | \n",
+ " 7.315 | \n",
+ " 32.39 | \n",
+ " 213.81 | \n",
+ " 263.82 | \n",
+ " 357.31 | \n",
+ " 0.1090 | \n",
+ "
\n",
+ " \n",
+ " 11522 | \n",
+ " 3683246 | \n",
+ " N | \n",
+ " 20.00 | \n",
+ " 60400 | \n",
+ " 0.9407 | \n",
+ " 21.440 | \n",
+ " 93.63 | \n",
+ " 338.57 | \n",
+ " 311.75 | \n",
+ " 35.25 | \n",
+ " 0.4590 | \n",
+ "
\n",
+ " \n",
+ " 15455 | \n",
+ " 3766081 | \n",
+ " N | \n",
+ " 16.27 | \n",
+ " 57742 | \n",
+ " 0.9919 | \n",
+ " 153.200 | \n",
+ " 145.50 | \n",
+ " 165.97 | \n",
+ " 77.94 | \n",
+ " 360.00 | \n",
+ " 0.5970 | \n",
+ "
\n",
+ " \n",
+ " 17114 | \n",
+ " 3799259 | \n",
+ " N | \n",
+ " 21.20 | \n",
+ " 60400 | \n",
+ " 0.9962 | \n",
+ " 332.600 | \n",
+ " 108.34 | \n",
+ " 219.67 | \n",
+ " 151.26 | \n",
+ " 0.39 | \n",
+ " 0.3330 | \n",
+ "
\n",
+ " \n",
+ " 19835 | \n",
+ " 3840692 | \n",
+ " N | \n",
+ " 22.61 | \n",
+ " 60400 | \n",
+ " 0.9889 | \n",
+ " 96.930 | \n",
+ " 139.83 | \n",
+ " 340.62 | \n",
+ " 193.09 | \n",
+ " 1.90 | \n",
+ " 0.1010 | \n",
+ "
\n",
+ " \n",
+ " 20634 | \n",
+ " 3843547 | \n",
+ " N | \n",
+ " 18.68 | \n",
+ " 60400 | \n",
+ " 0.7984 | \n",
+ " 5.774 | \n",
+ " 10.98 | \n",
+ " 348.98 | \n",
+ " 57.12 | \n",
+ " 114.87 | \n",
+ " 0.2340 | \n",
+ "
\n",
+ " \n",
+ " 20682 | \n",
+ " 3843840 | \n",
+ " N | \n",
+ " 18.17 | \n",
+ " 58729 | \n",
+ " 0.9789 | \n",
+ " 59.680 | \n",
+ " 159.03 | \n",
+ " 187.95 | \n",
+ " 176.27 | \n",
+ " 0.09 | \n",
+ " 0.2580 | \n",
+ "
\n",
+ " \n",
+ " 20698 | \n",
+ " 3843628 | \n",
+ " N | \n",
+ " 21.20 | \n",
+ " 60400 | \n",
+ " 0.7734 | \n",
+ " 5.693 | \n",
+ " 13.57 | \n",
+ " 252.31 | \n",
+ " 75.55 | \n",
+ " 122.88 | \n",
+ " 0.3770 | \n",
+ "
\n",
+ " \n",
+ " 20742 | \n",
+ " 3843716 | \n",
+ " N | \n",
+ " 20.05 | \n",
+ " 60400 | \n",
+ " 0.7984 | \n",
+ " 5.775 | \n",
+ " 10.96 | \n",
+ " 348.94 | \n",
+ " 57.16 | \n",
+ " 114.83 | \n",
+ " 0.2330 | \n",
+ "
\n",
+ " \n",
+ " 22110 | \n",
+ " 3989287 | \n",
+ " N | \n",
+ " 18.12 | \n",
+ " 60400 | \n",
+ " 0.9213 | \n",
+ " 7.701 | \n",
+ " 165.58 | \n",
+ " 105.87 | \n",
+ " 57.80 | \n",
+ " 66.15 | \n",
+ " 0.0817 | \n",
+ "
\n",
+ " \n",
+ " 28090 | \n",
+ " 54240416 | \n",
+ " N | \n",
+ " 18.61 | \n",
+ " 60400 | \n",
+ " 0.8883 | \n",
+ " 9.983 | \n",
+ " 4.72 | \n",
+ " 77.69 | \n",
+ " 298.05 | \n",
+ " 26.08 | \n",
+ " 0.1390 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " spkid pha H epoch_mjd e a i om \\\n",
+ "3510 3024715 Y 17.69 60400 0.9480 17.800 19.67 48.69 \n",
+ "6483 3434371 N 24.11 54767 0.9060 7.315 32.39 213.81 \n",
+ "11522 3683246 N 20.00 60400 0.9407 21.440 93.63 338.57 \n",
+ "15455 3766081 N 16.27 57742 0.9919 153.200 145.50 165.97 \n",
+ "17114 3799259 N 21.20 60400 0.9962 332.600 108.34 219.67 \n",
+ "19835 3840692 N 22.61 60400 0.9889 96.930 139.83 340.62 \n",
+ "20634 3843547 N 18.68 60400 0.7984 5.774 10.98 348.98 \n",
+ "20682 3843840 N 18.17 58729 0.9789 59.680 159.03 187.95 \n",
+ "20698 3843628 N 21.20 60400 0.7734 5.693 13.57 252.31 \n",
+ "20742 3843716 N 20.05 60400 0.7984 5.775 10.96 348.94 \n",
+ "22110 3989287 N 18.12 60400 0.9213 7.701 165.58 105.87 \n",
+ "28090 54240416 N 18.61 60400 0.8883 9.983 4.72 77.69 \n",
+ "\n",
+ " w ma moid \n",
+ "3510 333.32 117.04 0.0111 \n",
+ "6483 263.82 357.31 0.1090 \n",
+ "11522 311.75 35.25 0.4590 \n",
+ "15455 77.94 360.00 0.5970 \n",
+ "17114 151.26 0.39 0.3330 \n",
+ "19835 193.09 1.90 0.1010 \n",
+ "20634 57.12 114.87 0.2340 \n",
+ "20682 176.27 0.09 0.2580 \n",
+ "20698 75.55 122.88 0.3770 \n",
+ "20742 57.16 114.83 0.2330 \n",
+ "22110 57.80 66.15 0.0817 \n",
+ "28090 298.05 26.08 0.1390 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df['a']>5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "11f926f6-ee7c-47f1-82ae-1c5299e219f3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(34864, 11)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#We eliminate the values with a>5\n",
+ "df.drop(df[df['a']>5].index,inplace=True)\n",
+ "df.reset_index(drop=True,inplace=True)\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "d1203169-ac0c-4d03-9dac-ebd136cd7680",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " spkid | \n",
+ " H | \n",
+ " epoch_mjd | \n",
+ " e | \n",
+ " a | \n",
+ " i | \n",
+ " om | \n",
+ " w | \n",
+ " ma | \n",
+ " moid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 3.486400e+04 | \n",
+ " 34860.000000 | \n",
+ " 34864.000000 | \n",
+ " 34864.000000 | \n",
+ " 34864.000000 | \n",
+ " 34864.000000 | \n",
+ " 34864.000000 | \n",
+ " 34864.000000 | \n",
+ " 34864.000000 | \n",
+ " 3.486400e+04 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 2.360419e+07 | \n",
+ " 23.487377 | \n",
+ " 59701.525298 | \n",
+ " 0.437086 | \n",
+ " 1.744036 | \n",
+ " 11.983268 | \n",
+ " 171.907288 | \n",
+ " 182.575511 | \n",
+ " 171.776436 | \n",
+ " 8.546965e-02 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 2.362906e+07 | \n",
+ " 2.893991 | \n",
+ " 1572.100889 | \n",
+ " 0.176795 | \n",
+ " 0.556275 | \n",
+ " 10.567228 | \n",
+ " 103.655813 | \n",
+ " 104.304053 | \n",
+ " 122.267979 | \n",
+ " 9.826509e-02 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 3.001703e+06 | \n",
+ " 9.260000 | \n",
+ " 44221.000000 | \n",
+ " 0.002800 | \n",
+ " 0.461700 | \n",
+ " 0.010000 | \n",
+ " 0.010000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 4.540000e-07 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 3.701855e+06 | \n",
+ " 21.330000 | \n",
+ " 59976.000000 | \n",
+ " 0.304400 | \n",
+ " 1.294000 | \n",
+ " 4.420000 | \n",
+ " 80.377500 | \n",
+ " 93.367500 | \n",
+ " 49.525000 | \n",
+ " 1.280000e-02 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 3.843818e+06 | \n",
+ " 23.800000 | \n",
+ " 60400.000000 | \n",
+ " 0.451600 | \n",
+ " 1.692000 | \n",
+ " 8.490000 | \n",
+ " 171.650000 | \n",
+ " 184.545000 | \n",
+ " 164.730000 | \n",
+ " 4.520000e-02 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 5.416727e+07 | \n",
+ " 25.600000 | \n",
+ " 60400.000000 | \n",
+ " 0.564900 | \n",
+ " 2.171000 | \n",
+ " 16.802500 | \n",
+ " 252.400000 | \n",
+ " 272.662500 | \n",
+ " 291.915000 | \n",
+ " 1.280000e-01 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 5.443990e+07 | \n",
+ " 33.580000 | \n",
+ " 60435.000000 | \n",
+ " 0.970300 | \n",
+ " 4.816000 | \n",
+ " 154.350000 | \n",
+ " 359.980000 | \n",
+ " 359.960000 | \n",
+ " 360.000000 | \n",
+ " 7.080000e-01 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " spkid H epoch_mjd e a \\\n",
+ "count 3.486400e+04 34860.000000 34864.000000 34864.000000 34864.000000 \n",
+ "mean 2.360419e+07 23.487377 59701.525298 0.437086 1.744036 \n",
+ "std 2.362906e+07 2.893991 1572.100889 0.176795 0.556275 \n",
+ "min 3.001703e+06 9.260000 44221.000000 0.002800 0.461700 \n",
+ "25% 3.701855e+06 21.330000 59976.000000 0.304400 1.294000 \n",
+ "50% 3.843818e+06 23.800000 60400.000000 0.451600 1.692000 \n",
+ "75% 5.416727e+07 25.600000 60400.000000 0.564900 2.171000 \n",
+ "max 5.443990e+07 33.580000 60435.000000 0.970300 4.816000 \n",
+ "\n",
+ " i om w ma moid \n",
+ "count 34864.000000 34864.000000 34864.000000 34864.000000 3.486400e+04 \n",
+ "mean 11.983268 171.907288 182.575511 171.776436 8.546965e-02 \n",
+ "std 10.567228 103.655813 104.304053 122.267979 9.826509e-02 \n",
+ "min 0.010000 0.010000 0.000000 0.000000 4.540000e-07 \n",
+ "25% 4.420000 80.377500 93.367500 49.525000 1.280000e-02 \n",
+ "50% 8.490000 171.650000 184.545000 164.730000 4.520000e-02 \n",
+ "75% 16.802500 252.400000 272.662500 291.915000 1.280000e-01 \n",
+ "max 154.350000 359.980000 359.960000 360.000000 7.080000e-01 "
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "17be8338-6c23-4cc9-ae08-d1ab7f705a79",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of PHAs: 2421\n",
+ "Percentage of PHAs: 6.944125745754933\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Count the number of PHAs in the dataset\n",
+ "print(\"Number of PHAs:\", np.count_nonzero(df['pha']=='Y'))\n",
+ "print(\"Percentage of PHAs:\", 100*np.count_nonzero(df['pha']=='Y')/df.shape[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "cafcc865-5058-4a84-932b-8dcf95a5dcd4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of asteroids with MOID=0: 0\n",
+ "Number of asteroids with MOID < 0.05: 18275\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Number of asteroids with MOID=0 and MOID <0.05\n",
+ "print(\"Number of asteroids with MOID=0:\", len(df[df['moid']==0.0]))\n",
+ "print(\"Number of asteroids with MOID < 0.05:\", len(df[df['moid']<0.05]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "cb2545a2-9961-4118-a2e3-94647adc4d85",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "