import requests import json import numpy as np from sklearn.manifold import TSNE import matplotlib.pyplot as plt # --- Configuration --- OLLAMA_URL = "http://localhost:11434/api/embeddings" MODEL_NAME = "nomic-embed-text" # Make sure you have pulled this model # --- Data: Sentences to Analyze --- sentences = [ "The quick brown fox jumps over the lazy dog.", "A fast, dark-colored mammal leaps above a sleepy canine.", "Apples and oranges are common fruits.", "Bananas grow in bunches.", "The weather today is sunny and warm.", "Expect rain and clouds tomorrow.", "Python is a popular programming language.", "Software development requires careful planning.", ] # --- 1. Get Embeddings from Ollama --- embeddings = [] print(f"Getting embeddings using model: {MODEL_NAME}...") for i, sentence in enumerate(sentences): print(f" Processing sentence {i+1}/{len(sentences)}: '{sentence[:30]}...'") try: payload = { "model": MODEL_NAME, "prompt": sentence } response = requests.post(OLLAMA_URL, json=payload) response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx) # Parse the JSON response line by line if streaming, or directly if not # Ollama's embedding API typically returns a single JSON object, not streamed response_data = response.json() embeddings.append(response_data.get("embedding")) except requests.exceptions.RequestException as e: print(f"\nError connecting to Ollama or during API request: {e}") print("Ensure Ollama is running and the model name is correct.") exit() except json.JSONDecodeError as e: print(f"\nError decoding JSON response: {e}") print(f"Received text: {response.text}") exit() except Exception as e: print(f"\nAn unexpected error occurred for sentence '{sentence}': {e}") exit() # Check if we got any embeddings if not embeddings or any(e is None for e in embeddings): print("\nError: Failed to retrieve valid embeddings for some sentences.") exit() embeddings_array = np.array(embeddings) print(f"\nSuccessfully got {embeddings_array.shape[0]} embeddings with dimension {embeddings_array.shape[1]}.") # --- 2. Dimensionality Reduction (t-SNE) --- print("Reducing dimensionality using t-SNE...") # Adjust perplexity based on number of samples, must be less than n_samples perplexity_value = min(30, len(sentences) - 1) if perplexity_value <= 0: print("\nError: Need at least 2 sentences for t-SNE.") exit() tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity_value, init='pca', learning_rate='auto') reduced_embeddings = tsne.fit_transform(embeddings_array) print("Dimensionality reduction complete.") # --- 3. Visualization --- print("Plotting results...") plt.figure(figsize=(10, 8)) scatter = plt.scatter(reduced_embeddings[:, 0], reduced_embeddings[:, 1]) # Add labels to points for i, sentence in enumerate(sentences): plt.annotate(f"{i+1}", # Simple numeric label (reduced_embeddings[i, 0], reduced_embeddings[i, 1]), textcoords="offset points", xytext=(0, 5), # Offset the text slightly above the point ha='center', fontsize=8) # Optional: Print mapping for reference # print(f" Point {i+1}: '{sentence}'") plt.title(f'2D t-SNE visualization of sentence embeddings ({MODEL_NAME})') plt.xlabel('t-SNE Component 1') plt.ylabel('t-SNE Component 2') plt.grid(True, linestyle='--', alpha=0.6) plt.figtext(0.01, 0.01, "\n".join([f"{i+1}: {s[:60]}..." for i, s in enumerate(sentences)]), fontsize=7) # Add legend text plt.tight_layout(rect=[0, 0.1, 1, 1]) # Adjust layout to make space for legend plt.show() print("Done.")