from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
documents = [
"This is the first document.",
"This document is the second document.",
"And this is the third one.",
"Is this the first document?",
]
# Create CountVectorizer with max_features and stop_words
vectorizer = CountVectorizer(max_features=100, stop_words='english', min_df=0.1, max_df=0.9)
# Fit and transform the documents (X is a sparse matrix)
X = vectorizer.fit_transform(documents)
# Get the feature names (words)
feature_names = vectorizer.get_feature_names_out()
# Convert the matrix to an array and create a DataFrame for better visualization
df = pd.DataFrame(X.toarray(), columns=feature_names)
# Display the DataFrame
print(df)