d1="the best Italian restaurant enjoy the best pasta"
d2="american restaurant enjoy the best hamburger"
pip install textmining3
#termdocumentmatrix
import textmining
tdm=textmining.TermDocumentMatrix()
tdm.add_doc(d1)
tdm.add_doc(d2)
tdm=tdm.to_df(cutoff=0)
tdm
#inverted index
docs=[d1,d2]
inverted_index={}
for i,doc in enumerate(docs):
for term in doc.split():
if term in inverted_index:
inverted_index[term].add(i)
else:
inverted_index[term]={i}
inverted_index
#tfidf
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer()
result=tfidf.fit_transform(docs)
print(tfidf.vocabulary_)
print(result)
#cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
print(cosine_similarity(tdm))