import marimo __generated_with = "0.13.7" app = marimo.App(width="medium") @app.cell def _(): import requests import json import numpy as np from sklearn.manifold import TSNE import matplotlib.pyplot as plt import umap import pandas as pd import plotly.express as px return json, np, pd, px, requests, umap @app.cell def _(): # --- Configuration --- OLLAMA_URL = "http://localhost:11434/api/embeddings" MODEL_NAME = "nomic-embed-text" # Make sure you have pulled this model return MODEL_NAME, OLLAMA_URL @app.cell def _(): # --- Data: Sentences to Analyze (Focus on Sentiment & Topics) --- sentences = [ # Positive Sentiments "I had an absolutely wonderful time on vacation!", # Travel, Positive "This is the best pizza I've ever tasted, truly amazing.",# Food, Positive "The new software update significantly improved performance.",# Tech, Positive "She was overjoyed to receive the award.", # Emotion, Positive "What a beautiful, sunny day for a picnic!", # Weather/Activity, Positive # Negative Sentiments "The airline lost my luggage, ruining the start of my trip.", # Travel, Negative "I found the meal to be bland and overpriced.", # Food, Negative "Debugging this legacy code is incredibly frustrating.", # Tech, Negative "He felt heartbroken and betrayed after the argument.", # Emotion, Negative "The constant rain made the whole weekend gloomy.", # Weather/Activity, Negative # Neutral/Objective Sentiments "The train is scheduled to depart at 3:00 PM.", # Travel, Neutral "The ingredients listed include flour, water, and salt.",# Food, Neutral "The system requires 8GB of RAM to operate.", # Tech, Neutral "Please file the report by the end of the day.", # Work/Instruction, Neutral "The weather report indicates a chance of showers tomorrow.",# Weather, Neutral ] return (sentences,) @app.cell def _(MODEL_NAME, OLLAMA_URL, json, np, requests, sentences): # --- 1. Get Embeddings from Ollama --- embeddings = [] print(f"Getting embeddings using model: {MODEL_NAME}...") for i, sentence in enumerate(sentences): print(f" Processing sentence {i+1}/{len(sentences)}: '{sentence[:30]}...'") try: payload = { "model": MODEL_NAME, "prompt": sentence } response = requests.post(OLLAMA_URL, json=payload) response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) # Parse the JSON response line by line if streaming, or directly if not # Ollama's embedding API typically returns a single JSON object, not streamed response_data = response.json() embeddings.append(response_data.get("embedding")) except requests.exceptions.RequestException as e: print(f"\nError connecting to Ollama or during API request: {e}") print("Ensure Ollama is running and the model name is correct.") exit() except json.JSONDecodeError as e: print(f"\nError decoding JSON response: {e}") print(f"Received text: {response.text}") exit() except Exception as e: print(f"\nAn unexpected error occurred for sentence '{sentence}': {e}") exit() # Check if we got any embeddings if not embeddings or any(e is None for e in embeddings): print("\nError: Failed to retrieve valid embeddings for some sentences.") exit() embeddings_array = np.array(embeddings) print(f"\nSuccessfully got {embeddings_array.shape[0]} embeddings with dimension {embeddings_array.shape[1]}.") return (embeddings_array,) @app.cell def _(embeddings_array, umap): # --- 2. Dimensionality Reduction (UMAP) --- print("Reducing dimensionality using UMAP...") # Check if we have enough samples for default neighbors n_samples = embeddings_array.shape[0] if n_samples <= 1: print("\nError: Need at least 2 data points for UMAP.") exit() # Adjust n_neighbors if necessary - must be less than n_samples # Common defaults are 5-15. Lower values focus more on local structure. n_neighbors_value = min(15, n_samples - 1) if n_neighbors_value < 2: print(f"\nWarning: Small dataset (n={n_samples}). Setting UMAP n_neighbors to {n_samples - 1}.") n_neighbors_value = max(2, n_samples - 1) # UMAP needs at least 2 neighbors print(f" Using UMAP with n_neighbors={n_neighbors_value}") # Initialize UMAP # metric='cosine' is often recommended for high-dimensional text embeddings reducer = umap.UMAP( n_neighbors=n_neighbors_value, n_components=2, # Target dimension min_dist=0.1, # Controls how tightly points are packed metric='cosine', # Distance metric suitable for embeddings ) # Fit and transform the data reduced_embeddings = reducer.fit_transform(embeddings_array) print("Dimensionality reduction complete.") return (reduced_embeddings,) @app.cell def _(MODEL_NAME, pd, px, reduced_embeddings, sentences): # Create a Pandas DataFrame for easier plotting with Plotly Express df = pd.DataFrame({ 'x': reduced_embeddings[:, 0], 'y': reduced_embeddings[:, 1], 'sentence': sentences, 'index': [i + 1 for i in range(len(sentences))] # 1-based index for display }) # Create the interactive scatter plot fig = px.scatter( df, x='x', y='y', text='index', # Display the index number directly on the plot point hover_name='index', # Show index number prominently on hover hover_data={ # Configure what data appears on hover (tooltips) 'sentence': True, # Show the full sentence 'x': False, # Hide the x-coordinate from hover tooltip 'y': False, # Hide the y-coordinate 'index': False # Hide the index again (it's already in hover_name) }, title=f'Interactive 2D Visualization of sentence embeddings ({MODEL_NAME})', labels={'x': f'Component 1', 'y': f'Component 2'} # Axis labels ) # --- Optional Customizations --- # Adjust text label appearance fig.update_traces( textposition='top center', # Position text above the marker textfont_size=10 # Adjust font size of the index number ) # Adjust layout fig.update_layout( hovermode='closest', # Make hovering easier width=900, # Set plot width in pixels height=700, # Set plot height in pixels title_x=0.5 # Center the plot title ) # fig.update_layout(xaxis_visible=False, yaxis_visible=False) # Uncomment to hide axes # --- Show the plot --- # This will typically open the plot in your default web browser # or display it inline if you're in an environment like Jupyter Notebook/Lab. fig.show() print("Plot display initiated (check browser or output).") print("Done.") return if __name__ == "__main__": app.run()