duplicate name in index/columns: (err)
Package:
pandas
30911

Exception Class:
ValueError
Raise code
def insert_data(self):
if self.index is not None:
temp = self.frame.copy()
temp.index.names = self.index
try:
temp.reset_index(inplace=True)
except ValueError as err:
raise ValueError(f"duplicate name in index/columns: {err}") from err
else:
temp = self.frame
column_names = list(map(str, temp.columns))
ncols = len(column_names)
data_list = [None] * ncols
Links to the raise (1)
https://github.com/pandas-dev/pandas/blob/b3e335254f46a526ee3ce9bb757eac4011d9d1fe/pandas/io/sql.py#L905Ways to fix
SQL database in Pandas allows us to map Pandas tables to SQL tables. When we are creating DataFrame, need to be more accurate on columns name.
Code to reproduce the error:
import mysql.connector as myc
import pandas.io.sql as sql
import pandas as pd
df = pd.DataFrame([('lion', 389.0),
('bird', 24.0),
('mammal', 80.5),
('cammal', 30.5)],
index=['falcon', 'cammal', 'lion', 'monkey'],
columns=('index', 'level_0'))
# We are connecting to our database
conn = myc.connect(host="your host number",
user="Dabatase user name",
passwd="Database password",
port=your_port_number,
database="Database name")
# Creating database
db = sql.SQLDatabase(conn)
# Creating table
table = sql.SQLTable("test_type", db, frame=df)
keys,data_list = table.insert_data()
print(keys)
If we choose 'index' as a column name, need to accurately select the next column's name. Because When we are insert_data, DataFrame's index names become to value that return with function.
def insert_data(self):
if self.index is not None:
temp = self.frame.copy()
temp.index.names = self.index
try:
temp.reset_index(inplace=True)
except ValueError as err:
raise ValueError(f"duplicate name in index/columns: {err}") from err
else:
temp = self.frame
That is the whole index name. It returns at the end 'level_0'. Because code is checking index_label if it's None and 'index' is in column name and index name is None. As a result it can't reset index of DataFrame.
def _index_name(self, index, index_label):
# for writing: index=True to include index in sql table
if index is True:
nlevels = self.frame.index.nlevels
# if index_label is specified, set this as index name(s)
if index_label is not None:
if not isinstance(index_label, list):
index_label = [index_label]
if len(index_label) != nlevels:
raise ValueError(
"Length of 'index_label' should match number of "
f"levels, which is {nlevels}"
)
else:
return index_label
# return the used column labels for the index columns
if (
nlevels == 1
and "index" not in self.frame.columns
and self.frame.index.name is None
):
return ["index"]
else:
return [
l if l is not None else f"level_{i}"
for i, l in enumerate(self.frame.index.names)
]
So, if we define the column name index and level_0, we will get an error. To fix that, we need to define column name of DataFrame should be more accurate.
Fix code:
import mysql.connector as myc
import pandas.io.sql as sql
import pandas as pd
df = pd.DataFrame([('lion', 389.0),
('bird', 24.0),
('mammal', 80.5),
('cammal', 30.5)],
index=['falcon', 'cammal', 'lion', 'monkey'],
columns=('index', 'animals'))
# We are connecting to our database
conn = myc.connect(host="your host number",
user="Dabatase user name",
passwd="Database password",
port=your_port_number,
database="Database name")
# Creating database
db = sql.SQLDatabase(conn)
# Creating table
table = sql.SQLTable("test_type", db, frame=df)
keys,data_list = table.insert_data()
print(keys)
Add a possible fix
Please authorize to post fix