Update
This commit is contained in:
parent
d30baa49f6
commit
d53dcaeb54
2 changed files with 16 additions and 14 deletions
|
@ -14,3 +14,7 @@ feeds:
|
||||||
- name: "novinky.cz"
|
- name: "novinky.cz"
|
||||||
rss_source: "https://www.novinky.cz/rss"
|
rss_source: "https://www.novinky.cz/rss"
|
||||||
unique_tag: "guid"
|
unique_tag: "guid"
|
||||||
|
|
||||||
|
- name: "CTK"
|
||||||
|
rss_source: "https://www.ceskenoviny.cz/sluzby/rss/zpravy.php"
|
||||||
|
unique_tag: "guid"
|
26
headline.py
26
headline.py
|
@ -8,7 +8,6 @@ import json
|
||||||
import imgkit
|
import imgkit
|
||||||
|
|
||||||
from diff_match_patch import diff_match_patch
|
from diff_match_patch import diff_match_patch
|
||||||
import difflib
|
|
||||||
|
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
import hashlib
|
import hashlib
|
||||||
|
@ -29,21 +28,20 @@ dmp = diff_match_patch()
|
||||||
|
|
||||||
rc = redis.Redis(host='localhost', port=6379, db=0)
|
rc = redis.Redis(host='localhost', port=6379, db=0)
|
||||||
|
|
||||||
image_options = {
|
article_count = 0
|
||||||
'width': '450',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def write_article(article, rc):
|
def write_article(article, rc):
|
||||||
rval = json.dumps(article['content'])
|
rval = json.dumps(article['content'])
|
||||||
rc.set(article['rss_id'], rval)
|
rc.set(article['rss_id'], rval)
|
||||||
|
|
||||||
|
def process_diff(old, new, rss_id):
|
||||||
def process_diff(diff, article):
|
diff = dmp.diff_main(old['title'], new['title'])
|
||||||
dmp.diff_cleanupSemantic(diff)
|
dmp.diff_cleanupSemantic(diff)
|
||||||
html_diff = dmp.diff_prettyHtml(diff)
|
html_diff = dmp.diff_prettyHtml(diff)
|
||||||
filename = hashlib.md5(article['rss_id'].encode()).hexdigest() + ".jpg"
|
print(old['link'])
|
||||||
image = imgkit.from_string(html_diff, filename, options = {'width': '450'})
|
#print(diff)
|
||||||
|
#filename = "./img/" + hashlib.md5(rss_id.encode()).hexdigest() + ".jpg"
|
||||||
|
#image = imgkit.from_string(html_diff, filename, options = {'width': '450', 'quiet': ''})
|
||||||
return(True)
|
return(True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,10 +50,9 @@ def process_item(article, rc):
|
||||||
old = json.loads(rc.get(article['rss_id']))
|
old = json.loads(rc.get(article['rss_id']))
|
||||||
new = article['content']
|
new = article['content']
|
||||||
if old['title'] != new['title']:
|
if old['title'] != new['title']:
|
||||||
print('Article changed. Fuck the world.')
|
print('Article changed. World is fucked.')
|
||||||
diff = dmp.diff_main(old['title'], new['title'])
|
diff = process_diff(old, new, article['rss_id'])
|
||||||
process_diff(diff, article)
|
#write_article(article, rc)
|
||||||
#write_article(article_rc)
|
|
||||||
return(True)
|
return(True)
|
||||||
else:
|
else:
|
||||||
# Article is the same. All good!
|
# Article is the same. All good!
|
||||||
|
@ -66,7 +63,7 @@ def process_item(article, rc):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
article_count = 0
|
|
||||||
for feed in config['feeds']:
|
for feed in config['feeds']:
|
||||||
rss_source = str(feed['rss_source'])
|
rss_source = str(feed['rss_source'])
|
||||||
unique_tag = str(feed['unique_tag'])
|
unique_tag = str(feed['unique_tag'])
|
||||||
|
@ -93,5 +90,6 @@ for feed in config['feeds']:
|
||||||
}
|
}
|
||||||
article_count += 1
|
article_count += 1
|
||||||
process_item(article, rc)
|
process_item(article, rc)
|
||||||
|
|
||||||
print("Processed articles:")
|
print("Processed articles:")
|
||||||
print(article_count)
|
print(article_count)
|
Loading…
Reference in a new issue