forked from malakazzz0/urls-mthrfckr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurl2md.py
65 lines (44 loc) · 1.86 KB
/
url2md.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import requests
import re
from bs4 import BeautifulSoup
from datetime import datetime
def fetch_description_and_title(url):
"""Fetch the description and title of a given URL."""
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
description_tag = soup.find('meta', attrs={'name': 'description'}) or \
soup.find('meta', attrs={'property': 'og:description'})
title_tag = soup.find('title')
description = "Description not available"
title = "Unknown"
if description_tag and 'content' in description_tag.attrs:
description = description_tag['content']
if title_tag:
title = title_tag.string
return url, title, description
except requests.exceptions.RequestException as e:
return url, "Error", f"Error: {e}"
except AttributeError as e:
return url, "Error", f"Error: {e}"
def main():
urls_input = input("Enter the URLs (can be plain or in markdown format): ")
url_patterns = re.findall(
r'(?:\[.*?\]\()?(https?://\S+)(?:\))?', urls_input)
# Get the current date in YYYY-MM-DD format
current_date = datetime.now().strftime('%Y-%m-%d')
link_data = []
for url in url_patterns:
url = url.strip()
url, title, description = fetch_description_and_title(url)
link_data.append((url, title, description))
# Sort the data based on the title
link_data.sort(key=lambda x: x[1])
with open('link_descriptions.md', 'w', encoding='utf-8') as f:
# Write the current date as an h3 heading at the beginning of the markdown file
f.write(f"### {current_date}\n\n")
for url, title, description in link_data:
f.write(f"- [{title}]({url}) - {description}\n")
if __name__ == "__main__":
main()