This commit is contained in:
Matěj Divecký 2022-07-09 20:50:33 +02:00
parent d30baa49f6
commit d53dcaeb54
2 changed files with 16 additions and 14 deletions

View file

@ -14,3 +14,7 @@ feeds:
- name: "novinky.cz" - name: "novinky.cz"
rss_source: "https://www.novinky.cz/rss" rss_source: "https://www.novinky.cz/rss"
unique_tag: "guid" unique_tag: "guid"
- name: "CTK"
rss_source: "https://www.ceskenoviny.cz/sluzby/rss/zpravy.php"
unique_tag: "guid"

View file

@ -8,7 +8,6 @@ import json
import imgkit import imgkit
from diff_match_patch import diff_match_patch from diff_match_patch import diff_match_patch
import difflib
from pprint import pprint from pprint import pprint
import hashlib import hashlib
@ -29,21 +28,20 @@ dmp = diff_match_patch()
rc = redis.Redis(host='localhost', port=6379, db=0) rc = redis.Redis(host='localhost', port=6379, db=0)
image_options = { article_count = 0
'width': '450',
}
def write_article(article, rc): def write_article(article, rc):
rval = json.dumps(article['content']) rval = json.dumps(article['content'])
rc.set(article['rss_id'], rval) rc.set(article['rss_id'], rval)
def process_diff(old, new, rss_id):
def process_diff(diff, article): diff = dmp.diff_main(old['title'], new['title'])
dmp.diff_cleanupSemantic(diff) dmp.diff_cleanupSemantic(diff)
html_diff = dmp.diff_prettyHtml(diff) html_diff = dmp.diff_prettyHtml(diff)
filename = hashlib.md5(article['rss_id'].encode()).hexdigest() + ".jpg" print(old['link'])
image = imgkit.from_string(html_diff, filename, options = {'width': '450'}) #print(diff)
#filename = "./img/" + hashlib.md5(rss_id.encode()).hexdigest() + ".jpg"
#image = imgkit.from_string(html_diff, filename, options = {'width': '450', 'quiet': ''})
return(True) return(True)
@ -52,10 +50,9 @@ def process_item(article, rc):
old = json.loads(rc.get(article['rss_id'])) old = json.loads(rc.get(article['rss_id']))
new = article['content'] new = article['content']
if old['title'] != new['title']: if old['title'] != new['title']:
print('Article changed. Fuck the world.') print('Article changed. World is fucked.')
diff = dmp.diff_main(old['title'], new['title']) diff = process_diff(old, new, article['rss_id'])
process_diff(diff, article) #write_article(article, rc)
#write_article(article_rc)
return(True) return(True)
else: else:
# Article is the same. All good! # Article is the same. All good!
@ -66,7 +63,7 @@ def process_item(article, rc):
article_count = 0
for feed in config['feeds']: for feed in config['feeds']:
rss_source = str(feed['rss_source']) rss_source = str(feed['rss_source'])
unique_tag = str(feed['unique_tag']) unique_tag = str(feed['unique_tag'])
@ -93,5 +90,6 @@ for feed in config['feeds']:
} }
article_count += 1 article_count += 1
process_item(article, rc) process_item(article, rc)
print("Processed articles:") print("Processed articles:")
print(article_count) print(article_count)