Skip to content

Commit

Permalink
release dates?
Browse files Browse the repository at this point in the history
  • Loading branch information
fshowalter committed Dec 7, 2023
1 parent 7fbd0b9 commit 60bf8bc
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 152 deletions.
35 changes: 35 additions & 0 deletions movielog/repository/imdb_http.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import re
from dataclasses import dataclass, field
from datetime import datetime
from typing import Literal, Optional, get_args

import imdb
Expand Down Expand Up @@ -45,6 +47,7 @@ class TitlePage(object):
genres: list[str]
countries: list[str]
sound_mix: set[str]
release_date: str


def parse_roles(person: imdb.Person.Person) -> list[str]:
Expand Down Expand Up @@ -119,6 +122,37 @@ def build_name_credits_for_title_page(
return credits


def unknown_date(imdb_movie: imdb.Movie.Movie) -> str:
return "{0}-??-??".format(imdb_movie["year"])


def parse_release_date(imdb_movie: imdb.Movie.Movie) -> str:
re_match = re.search(r"(.*)\s\((.*)\)", imdb_movie.get("original air date", ""))

if not re_match:
return unknown_date(imdb_movie)

imdb_date = re_match.group(1)

if not imdb_date:
return unknown_date(imdb_movie)

date_country = None

if len(re_match.groups()) == 2:
date_country = re_match.group(2)

primary_country = next(iter(imdb_movie.get("countries", [])), None)

if date_country and date_country != primary_country:
return unknown_date(imdb_movie)

try:
return datetime.strptime(imdb_date, "%d %b %Y").date().isoformat()
except ValueError:
return imdb_date


def get_name_page(imdb_id: str) -> NamePage:
imdb_name_page = imdb_http.get_person(imdb_id[2:])

Expand All @@ -137,4 +171,5 @@ def get_title_page(imdb_id: str) -> TitlePage:
countries=imdb_movie.get("countries", []),
sound_mix=set(imdb_movie.get("sound mix", [])),
credits=build_name_credits_for_title_page(imdb_movie),
release_date=parse_release_date(imdb_movie),
)
152 changes: 0 additions & 152 deletions movielog/repository/json_titles.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@

import json
import os
from dataclasses import dataclass
from glob import glob
from pathlib import Path
from typing import Iterable, Optional, TypedDict, cast

import imdb
from slugify import slugify

from movielog.repository import slugifier
from movielog.utils import path_tools
Expand Down Expand Up @@ -83,155 +80,6 @@ def generate_title_slug(title: str, year: str) -> str:
return "{0}-{1}".format(slugifier.slugify_title(title), year)


def reset() -> None:
json_files = glob(os.path.join(FOLDER_NAME, "*.json"))
total_count = len(json_files)

for index, file_path in enumerate(json_files):
with open(file_path, "r+") as json_file:
json_title = cast(JsonTitle, json.load(json_file))

updated_title = JsonTitle(
imdbId=json_title["imdbId"],
slug=json_title["slug"],
title=json_title["title"],
originalTitle=json_title["originalTitle"],
sortTitle="{0} ({1})".format(
json_title["sortTitle"], json_title["year"]
),
year=json_title["year"],
releaseDate=json_title["releaseDate"],
countries=json_title["countries"],
genres=json_title["genres"],
directors=json_title["directors"],
performers=json_title["performers"],
writers=json_title["writers"],
)

if updated_title == json_title:
logger.log(
"{}/{} No updates for {}.",
index + 1,
total_count,
file_path,
)
continue

json_file.seek(0)
json_file.write(json.dumps(updated_title, default=str, indent=2))
json_file.truncate()

logger.log(
"{}/{} Updated {}.",
index + 1,
total_count,
file_path,
)


def fix_all() -> None:
processed_files = []
existing_progress = []

progress_file_path = os.path.join(FOLDER_NAME, ".progress")
path_tools.ensure_file_path(progress_file_path)

if os.path.isfile(progress_file_path):
with open(progress_file_path, "r") as existing_progress_output_file:
existing_progress = existing_progress_output_file.read().splitlines()

try:
json_files = glob(os.path.join(FOLDER_NAME, "*.json"))
total_count = len(json_files)

for index, file_path in enumerate(json_files):
with open(file_path, "r+") as json_file:
if file_path in existing_progress:
logger.log(
"{}/{} Skipped {} (already processed).",
index + 1,
total_count,
file_path,
)
continue

json_title = cast(JsonTitle, json.load(json_file))
imdb_movie = imdb_http.get_movie(json_title["imdbId"][2:])

updated_title = JsonTitle(
imdbId=json_title["imdbId"],
slug=slugify(
"{0} ({1})".format(imdb_movie["title"], imdb_movie["year"]),
replacements=[("'", "")],
),
title=imdb_movie["title"],
originalTitle=imdb_movie["original title"],
sortTitle=imdb_movie["canonical title"],
year=imdb_movie["year"],
releaseDate=parse_release_date(imdb_movie),
countries=imdb_movie["countries"],
genres=imdb_movie["genres"],
directors=[
JsonDirector(
imdbId="nm{0}".format(director.personID),
name=director["name"],
sequence=index,
)
for index, director in enumerate(imdb_movie["directors"])
if moviedata_api.valid_director_notes(director)
],
performers=build_performers(imdb_movie),
writers=[
JsonWriter(
imdbId="nm{0}".format(writer.personID),
name=writer["name"],
sequence=index,
notes=None if writer.notes == "" else writer.notes,
)
for index, writer in enumerate(imdb_movie.get("writers", []))
if writer.keys() and moviedata_api.valid_writer_notes(writer)
],
)

if updated_title == json_title:
logger.log(
"{}/{} No updates for {}.",
index + 1,
total_count,
file_path,
)
processed_files.append(file_path)
continue

json_file.seek(0)
json_file.write(json.dumps(updated_title, default=str, indent=2))
json_file.truncate()

logger.log(
"{}/{} Updated {}.",
index + 1,
total_count,
file_path,
)

processed_files.append(file_path)

except:
with open(progress_file_path, "a") as progress_output_file:
progress_output_file.writelines(
filename + "\n" for filename in processed_files
)

logger.log(
"Wrote {}.",
progress_file_path,
)
return

if os.path.isfile(progress_file_path):
os.unlink(progress_file_path)


def generate_file_path(json_title: JsonTitle) -> str:
if not json_title["slug"]:
json_title["slug"] = generate_title_slug(
Expand Down
1 change: 1 addition & 0 deletions movielog/repository/title_data_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def update_json_title_with_db_data(json_title: json_titles.JsonTitle) -> None:
def update_json_title_with_title_page_data(json_title: json_titles.JsonTitle) -> None:
imdb_title_page = imdb_http.get_title_page(json_title["imdbId"])

json_title["releaseDate"] = imdb_title_page.release_date
json_title["countries"] = imdb_title_page.countries
json_title["genres"] = imdb_title_page.genres
json_title["directors"] = [
Expand Down

0 comments on commit 60bf8bc

Please sign in to comment.