xxxxxxxxxx
result_df = df.drop_duplicates(subset=['Column1', 'Column2'], keep='first')
print(result_df)
xxxxxxxxxx
import pandas as pd
# Drop all duplicates in the DataFrame
df = df.drop_duplicates()
# Drop all duplicates in a specific column of the DataFrame
df = df.drop_duplicates(subset = "column")
# Drop all duplicate pairs in DataFrame
df = df.drop_duplicates(subset = ["column", "column2"])
# Display DataFrame
print(df)
xxxxxxxxxx
# Below are quick example
# keep first duplicate row
df2 = df.drop_duplicates()
# Using DataFrame.drop_duplicates() to keep first duplicate row
df2 = df.drop_duplicates(keep='first')
# keep last duplicate row
df2 = df.drop_duplicates( keep='last')
# Remove all duplicate rows
df2 = df.drop_duplicates(keep=False)
# Delete duplicate rows based on specific columns
df2 = df.drop_duplicates(subset=["Courses", "Fee"], keep=False)
# Drop duplicate rows in place
df.drop_duplicates(inplace=True)
# Using DataFrame.apply() and lambda function
df2 = df.apply(lambda x: x.astype(str).str.lower()).drop_duplicates(subset=['Courses', 'Fee'], keep='first')
xxxxxxxxxx
def remove_dupiclates(list_):
new_list = []
for a in list_:
if a not in new_list:
new_list.append(a)
return new_list
xxxxxxxxxx
# Remove by index
df = df[df.index.duplicated(keep='first')]
# Other methods to remove duplicates
import pandas as pd
df = df.drop_duplicates()
df = df.drop_duplicates(subset = "column")
df = df.drop_duplicates(subset = ["column", "column2"])
xxxxxxxxxx
word = input().split()
for i in word:
if word.count(i) > 1:
word.remove(i)
xxxxxxxxxx
df = pd.DataFrame({"Date": ["2022", "2022", "2021", "2021", "2020", "2020"], "Time": ["20:00", "20:00", "20:00", "21:00", "22:00", "22:00"]})
df.drop_duplicates()
#output
# Date Time
# 2022 20:00
# 2021 20:00
# 2021 21:00
# 2020 22:00
xxxxxxxxxx
if mylist:
mylist.sort()
last = mylist[-1]
for i in range(len(mylist)-2, -1, -1):
if last == mylist[i]:
del mylist[i]
else:
last = mylist[i]
# Quicker if all elements are hashables:
mylist = list(set(mylist))