import openai
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
# Set up OpenAI API credentials
openai.api_key = 'YOUR_API_KEY'
response=openai.Embedding.create(
model="text-embedding-ada-002",
input="The food was delicious and the waiter..."
)
print(response)
# Define the sentences to be embedded and sorted
sentences = [
"How to use ChatGPT for sentence embedding?",
"What are the applications of sentence embedding?",
"Can you provide examples of sentence embedding?",
"What is the best approach for sentence embedding?"
]
# Define a new sentence for similarity comparison
new_sentence = "How does sentence embedding work?"
# Preprocess and generate the embeddings
embeddings = []
for sentence in sentences:
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=sentence
)
embedding = response.data[0].embedding
embeddings.append(embedding)
# Convert embeddings to numpy array
embeddings = np.array(embeddings)
# Calculate pairwise cosine similarity
similarity_matrix = cosine_similarity(embeddings)
# Sort sentences based on similarity to the first sentence
sorted_indices = np.argsort(similarity_matrix[:, 0])[::-1]
sorted_sentences = [sentences[i] for i in sorted_indices]
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=new_sentence
)
new_embedding = response.data[0].embedding
# Calculate cosine similarity of the new embedding with existing embeddings
similarity_scores = cosine_similarity([new_embedding], embeddings)[0]
# Sort the indices based on similarity scores
sorted_indices = np.argsort(similarity_scores)[::-1]
# Retrieve the most similar sentence
most_similar_sentence = sentences[sorted_indices[0]]
# Print the sorted sentences and the most similar sentence
print("Sorted Sentences:")
for sentence in sorted_sentences:
print(sentence)
print("\nMost Similar Sentence:")
print(most_similar_sentence)