from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel#
# Initialize an instance of tf-idf Vectorizer
tfidf_vectorizer = TfidfVectorizer()
corpus:
['The sun is the largest celestial body in the solar system',
'The solar system consists of the sun and eight revolving planets',
'Ra was the Egyptian Sun God',
'The Pyramids were the pinnacle of Egyptian architecture',
'The quick brown fox jumps over the lazy dog']
# Generate the tf-idf vectors for the corpus
tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
# Compute and print the cosine similarity matrix
cosine_sim = cosine_similarity( tfidf_matrix,tfidf_matrix)
# Compute cosine similarity matrix using linear_kernel
cosine_sim2 = linear_kernel(tfidf_matrix, tfidf_matrix)
print(cosine_sim)
print(cosine_sim2)