forked from xuv/NYTdiff
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Moved html to template and prevented multiple browsers * Changed image diff to be a class * Added script * Added validator infrastructure * Fixed script * Added tweet validator support and content_validator.py * Fixed CR comments
- Loading branch information
Showing
9 changed files
with
187 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ img { | |
vertical-align: middle; | ||
margin-right: -5px; | ||
margin-bottom: 5px; | ||
width: 30px; | ||
} | ||
|
||
.alignleft { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from pytz import timezone | ||
|
||
import feedparser | ||
|
||
from validators import html_validator | ||
from israel_hayom_parser import IsraelHayomParser as Parser | ||
|
||
TIMEZONE = 'Israel' | ||
LOCAL_TZ = timezone(TIMEZONE) | ||
|
||
|
||
def main(): | ||
parser = Parser(LOCAL_TZ) | ||
r = feedparser.parse(parser.url) | ||
|
||
if r is None: | ||
print("RSS was empty") | ||
return | ||
|
||
print(f"Checking {len(r.entries)} entries") | ||
for entry in r.entries[::-1]: | ||
entry_dict = parser.entry_to_dict(entry) | ||
url = entry_dict["url"] | ||
title = entry_dict["title"] | ||
description = entry_dict["abstract"] | ||
if not html_validator.validate_change(url, "", title): | ||
print(f"Could not find title \n{title} \nin {url}") | ||
if not html_validator.validate_change(url, "", description): | ||
print(f"Could not find description \n{description}\nin {url}") | ||
|
||
print("Finished iterating") | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<!doctype html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="utf-8"> | ||
<link rel="stylesheet" href="./css/styles.css"> | ||
</head> | ||
|
||
<body style="width: 500px;"> | ||
<div id="wrapper"> | ||
<div> | ||
text_to_tweet: | ||
</div> | ||
<p> | ||
diff_html | ||
</p> | ||
<div> | ||
<p class="alignleft"> | ||
<img src="img/twitter.png"/> | ||
@ILNewsDiff | ||
<span class="alignright"> | ||
כותרת בשינוי אדרת | ||
</span> | ||
</p> | ||
</div> | ||
</div> | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import math | ||
import re | ||
|
||
|
||
def one_char_difference(first: str, second: str): | ||
""" | ||
Checks if There is maximum of one char difference between first and second | ||
""" | ||
if math.fabs(len(first) - len(second)) >= 2: | ||
return False | ||
|
||
for i, (c1, c2) in enumerate(zip(first, second)): | ||
if c1 != c2: | ||
return first[i + 1:] == second[i + 1:] \ | ||
or first[i:] == second[i + 1:] \ | ||
or first[i + 1:] == second[i:] | ||
|
||
return True | ||
|
||
|
||
ALPHABET_WITH_HEBREW_PATTERN = r"[^a-zA-Zא-ת0-9]" | ||
|
||
|
||
def validate_change(url: str, old: str, new: str): | ||
""" | ||
Checks if there is a maximum of 1 char difference out of only the alphabet chars in old an new or if one has '?' | ||
and the second doesn't | ||
""" | ||
if ("?" in old and "?" not in new) or ("?" in new and "?" not in old): | ||
return True | ||
|
||
old_stripped = re.sub(ALPHABET_WITH_HEBREW_PATTERN, '', old) | ||
new_stripped = re.sub(ALPHABET_WITH_HEBREW_PATTERN, '', new) | ||
|
||
return not one_char_difference(old_stripped, new_stripped) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters