Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.2 fixes #11

Merged
merged 8 commits into from
Aug 17, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 47 additions & 23 deletions medg.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/Users/litlnemo/aProjects/coding/med/myenv/bin/python

import PySimpleGUI as sg
import os
import requests
from bs4 import BeautifulSoup
import webbrowser
import re
from tkinter import filedialog

"""med.py: A script for searching the Middle English Dictionary,
"""
med.py: A script for searching the Middle English Dictionary,
designed for use by those researching Medieval names.

v. 1.0.1, 14 September 2013
Expand Down Expand Up @@ -36,8 +36,15 @@
v. 2.2G 12 Aug 2024
- different searches added -- "definition and notes" and "modern English word equivalent"
- from this point version numbers are intended to match the standalone app

v. 2.2.3 17 Aug 2024
- fixed visual stuff on the html results page
- now users can choose where to save the page
"""

# Ask user where to save file
dirname = filedialog.askdirectory(title="Choose a folder for your results", message="Choose a folder for your results")

# Search window contents
sg.theme('SystemDefaultForReal')
layout = [ [sg.Text('Enter search word: '), sg.InputText(key='searchword')],
Expand Down Expand Up @@ -90,53 +97,72 @@

# Open file to write to, write HTML for top of page

file_path = os.path.expanduser("~/Documents/medresults.html")
file_path = os.path.expanduser(f"{dirname}/medresults.html")
with open(file_path, "w", encoding="utf-8") as file:
file.write(
"<html>"
+ "\n"
+ '<head><title>Middle English Dictionary open search results</title><base href="http://quod.lib.umich.edu" /><meta charset="UTF-8"></head>'
+ '<head><title>Middle English Dictionary open search results</title><base href="http://quod.lib.umich.edu" /><meta charset="UTF-8">'
+ "\n"
+ '<body style="background-color: #fdf0b4; font-family: helvetica, arial, sans-serif; margin: 10px 20px 10px 20px;">'
+ '<style>li:nth-of-type(odd) { background-color: #fdf0b4; }</style>'
+ "\n"
+ '</head>'
+ "\n"
+ '<body style="font-family: helvetica, arial, sans-serif; margin:0 auto; max-width: 700px; " >'
+ "\n"
)
file.write(
'<h1>Middle English Dictionary open search results for the word "'
'<h1 style="text-align: center;" width="100%">Middle English Dictionary open search results for the word <span style="font-style: italic">"'
+ str(search_term)
+ '":</h1>'
+ '"</span>:</h1>'
+ "\n"
)
file.write("<ul>" + "\n")
file.write("<ol>" + "\n")

# Find relevant links, build LIs with them

get_urls = soup.find_all("h3", class_="index_title document-title-heading col-md-12" )
get_urls = soup.find_all("h3", class_="index_title document-title-heading col-md-12")

if not get_urls:

print("Error! No results.")
file.write("<li>" + "\n")
file.write("Sorry, no results!" + "\n")
file.write("</li>" + "\n")

for h3_tag in get_urls:
# Find the <a> tag inside the <h3>
link_tag = h3_tag.find("a")

for link in get_urls:
file.write("<li>" + "\n")
index_number = soup.find(string=re.compile(r"/m/middle-english-dictionary/dictionary/MED\d\d\d\d\d"))
# If there is no <a> tag, continue to the next h3_tag
if not link_tag:
continue

full_url = "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary/" + str(index_number)
# Remove the h3 tag but keep its contents (specifically the <a> tag)
h3_tag.unwrap()

link["href"] = full_url
# Remove the span with the class "document-counter" if it exists
span_counter = h3_tag.find("span", class_="document-counter")
if span_counter:
span_counter.decompose()

file.write(str(link))
file.write("</li>" + "\n")
print("Added " + str(count + 1) + " to page")
file.write('<li style="padding: 10px 0 10px 5px"><b>' + "\n")

# Find the correct index number in the link
index_number = re.search(r"MED\d\d\d\d\d", link_tag["href"])

if index_number:
full_url = "https://quod.lib.umich.edu/m/middle-english-dictionary/dictionary/" + index_number.group(0)
link_tag["href"] = full_url

file.write(str(link_tag))
file.write("</li></b>" + "\n")
print("Added " + str(count + 1) + " to page")
count += 1

# Finish HTML and close 'er up!

file.write(
"</ul>"
"</ol>"
+ "\n"
+ '<p style="font-size: small; border-top: 1px solid black; margin-top: 10px; padding-top: 15px;">Page generated by MED Open Search. Software &copy; Wendi Dunlap, 2013, 2024. The MED and all search results &copy; University of Michigan. </p></body>'
+ "\n"
Expand All @@ -145,6 +171,4 @@

html_url = "file://" + file_path

webbrowser.open_new(html_url)


webbrowser.open_new(html_url)