-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatabaseImport.py
58 lines (50 loc) · 1.7 KB
/
databaseImport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import mysql.connector
# Replace the following variables with your MySQL database connection details
mysql_host = "127.0.0.1"
mysql_port = 3306
mysql_user = "root"
mysql_password = "root"
mysql_database = "movies"
# Read data from data.tsv using pandas
data_file = 'data.tsv'
df = pd.read_csv(data_file, sep='\t')
df = df.loc[df['startYear'] > "2000"]
df = df.loc[df['startYear'] != "\\N"]
df = df.loc[df['titleType']== "movie"]
df = df.head(1000)
#Connect to the MySQL database
try:
connection = mysql.connector.connect(
host=mysql_host,
port=mysql_port,
user=mysql_user,
password=mysql_password,
database=mysql_database
)
cursor = connection.cursor()
# Insert data into the 'movies' table
for index, row in df.iterrows():
# Replace the column names in the 'INSERT INTO' query with your actual column names
sql = "INSERT INTO movies_movie (tconst, titleType, primaryTitle, originalTitle, isAdult, startYear, endYear, runtimeMinutes, genres) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)"
values = (
row['tconst'],
row['titleType'],
row['primaryTitle'],
row['originalTitle'],
row['isAdult'],
row['startYear'],
row['endYear'],
row['runtimeMinutes'],
row['genres']
)
cursor.execute(sql, values)
connection.commit()
print("Data inserted successfully!")
except mysql.connector.Error as error:
print("Error while connecting to MySQL or inserting data:", error)
finally:
if connection.is_connected():
cursor.close()
connection.close()
print("MySQL connection is closed.")