You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.py 4.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import json
  5. import re
  6. import tempfile
  7. from urllib.parse import urlparse
  8. import requests
  9. from mastodon import Mastodon
  10. def load_config():
  11. with open(os.getenv('MR_CONFIG', 'config.json'), 'r') as f:
  12. return json.loads(f.read())
  13. def load_recents():
  14. try:
  15. with open(os.getenv('MR_RECENTS', 'recents.json'), 'r') as f:
  16. return json.loads(f.read())
  17. except NotFound:
  18. return []
  19. def save_recents(recents):
  20. with open(os.getenv('MR_RECENTS', 'recents.json'), 'w') as f:
  21. f.write(json.dumps(recents))
  22. def make_masto(config):
  23. baseurl = 'https://%s' % config['hostname']
  24. return Mastodon(access_token=config['token'], api_base_url=baseurl)
  25. def download_file(url, destdir):
  26. resp = requests.get(url)
  27. resp.raise_for_status()
  28. fname = None
  29. if 'content-disposition' in resp.headers:
  30. # Taken from: https://stackoverflow.com/questions/31804799/
  31. d = resp.headers['content-disposition']
  32. fname = re.findall("filename=(.+)", d)[0]
  33. else:
  34. u = urlparse(url)
  35. fname = u.path.rsplit('/', 1)[-1]
  36. destpath = os.path.join(destdir, fname)
  37. with open(destpath, 'wb') as f:
  38. f.write(resp.content)
  39. return destpath
  40. def make_post(masto, data):
  41. with tempfile.TemporaryDirectory(prefix='mastoreddit.', dir='/tmp') as td:
  42. os.makedirs(td, exist_ok=True)
  43. if len(data['media']) > 4:
  44. data['media'] = data['media'][:4]
  45. image_resps = None
  46. if data['media'] is not None and len(data['media']) > 0:
  47. image_resps = []
  48. for a in data['media']:
  49. dlpath = download_file(a, td)
  50. res = masto.media_post(dlpath)
  51. image_resps.append(res)
  52. body = '%s\n\nby /u/%s at https://old.reddit.com/%s' % (data['title'], data['username'], data['rid'])
  53. masto.status_post(body, media_ids=image_resps)
  54. def extract_media_urls(data):
  55. if 'selftext' in data:
  56. st = data['selftext']
  57. if st is not None and len(st) > 0:
  58. return []
  59. if 'post_hint' not in data:
  60. if 'gallery_data' in data:
  61. gd = data['gallery_data']['items']
  62. mmd = data['media_metadata']
  63. ents = []
  64. for ent in gd:
  65. mid = ent['media_id']
  66. mime = mmd[mid]['m']
  67. _, ext = mime.split('/')
  68. url = 'https://i.redd.it/%s.%s' % (mid, ext)
  69. ents.append(url)
  70. return ents
  71. else:
  72. raise ValueError('no hint type and missing gallery')
  73. hint = data['post_hint']
  74. if hint == 'link':
  75. if 'crosspost_parent_list' in data:
  76. return extract_media_urls(data['crosspost_parent_list'][0])
  77. else:
  78. # Not a reddit crosspost.
  79. return []
  80. if hint == 'hosted:video':
  81. return [data['secure_media']['reddit_video']['fallback_url']]
  82. if hint == 'image':
  83. return [data['url']]
  84. if hint == 'rich:video':
  85. # TODO maybe use thumbnail?
  86. print('got a video at', data['url'], 'but ignoring')
  87. raise ValueError('unknown hint type ' + hint)
  88. def process_post(post):
  89. d = post['data']
  90. media = []
  91. try:
  92. media = extract_media_urls(d)
  93. except Exception as e:
  94. print('error processing ' + d['id'] + ': ' + str(e))
  95. return {
  96. 'rid': d['id'],
  97. 'title': d['title'],
  98. 'username': d['author'],
  99. 'score': d['ups'],
  100. 'media': media
  101. }
  102. def query_tops(config):
  103. url = 'https://www.reddit.com/r/%s/top.json?sort=top&t=day' % config['subreddit']
  104. resp = requests.get(url, headers = {'User-agent': 'mastoreddit'})
  105. resp.raise_for_status()
  106. j = resp.json()
  107. posts = []
  108. for post in j['data']['children']:
  109. if post['kind'] != 't3':
  110. print('found post of kind', post['kind'])
  111. p = process_post(post)
  112. lt = p['title'].lower()
  113. if any(f in lt for f in config['skip_titles']):
  114. continue
  115. posts.append(p)
  116. return posts
  117. def filter_posts(config, posts, filt):
  118. tp = config['top_posts']
  119. ok = []
  120. filt = set(filt)
  121. newfilt = set()
  122. for p in posts:
  123. if len(ok) <= tp and p['score'] >= config['min_score']:
  124. if p['rid'] not in filt:
  125. ok.append(p)
  126. newfilt.add(p['rid'])
  127. elif p['rid'] in filt:
  128. newfilt.add(p['rid'])
  129. return ok, list(newfilt)
  130. def just_toot(config, msg):
  131. masto = make_masto(config)
  132. masto.toot(msg)
  133. def main():
  134. config = load_config()
  135. masto = make_masto(config)
  136. recents = load_recents()
  137. res = query_tops(config)
  138. okps, newrecents = filter_posts(config, res, recents)
  139. for p in okps:
  140. print('posting', p['rid'], '-', p['title'])
  141. make_post(masto, p)
  142. save_recents(newrecents)
  143. if __name__ == '__main__':
  144. main()