You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.py 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import json
  5. import re
  6. import tempfile
  7. from urllib.parse import urlparse
  8. import requests
  9. from mastodon import Mastodon
  10. MAX_SIZE = 39000000
  11. def load_config():
  12. with open(os.getenv('MR_CONFIG', 'config.json'), 'r') as f:
  13. return json.loads(f.read())
  14. def load_recents():
  15. try:
  16. with open(os.getenv('MR_RECENTS', 'recents.json'), 'r') as f:
  17. return json.loads(f.read())
  18. except NotFound:
  19. return []
  20. def save_recents(recents):
  21. with open(os.getenv('MR_RECENTS', 'recents.json'), 'w') as f:
  22. f.write(json.dumps(recents))
  23. def make_masto(config):
  24. baseurl = 'https://%s' % config['hostname']
  25. return Mastodon(access_token=config['token'], api_base_url=baseurl)
  26. def download_file(url, destdir):
  27. resp = requests.get(url)
  28. resp.raise_for_status()
  29. fname = None
  30. if 'content-disposition' in resp.headers:
  31. # Taken from: https://stackoverflow.com/questions/31804799/
  32. d = resp.headers['content-disposition']
  33. fname = re.findall("filename=(.+)", d)[0]
  34. else:
  35. u = urlparse(url)
  36. fname = u.path.rsplit('/', 1)[-1]
  37. destpath = os.path.join(destdir, fname)
  38. with open(destpath, 'wb') as f:
  39. f.write(resp.content)
  40. return destpath
  41. def make_post(masto, data):
  42. with tempfile.TemporaryDirectory(prefix='mastoreddit.', dir='/tmp') as td:
  43. os.makedirs(td, exist_ok=True)
  44. if len(data['media']) > 4:
  45. data['media'] = data['media'][:4]
  46. image_resps = None
  47. if data['media'] is not None and len(data['media']) > 0:
  48. image_resps = []
  49. for a in data['media']:
  50. dlpath = download_file(a, td)
  51. fsize = os.path.getsize(dlpath)
  52. if fsize > MAX_SIZE:
  53. print('ignoring file (too big):', dlpath, fsize, 'bytes')
  54. continue
  55. res = masto.media_post(dlpath)
  56. image_resps.append(res)
  57. body = '%s\n\nby /u/%s at https://old.reddit.com/%s' % (data['title'], data['username'], data['rid'])
  58. masto.status_post(body, media_ids=image_resps)
  59. def extract_media_urls(data):
  60. if 'selftext' in data:
  61. st = data['selftext']
  62. if st is not None and len(st) > 0:
  63. return []
  64. if 'post_hint' not in data:
  65. if 'gallery_data' in data:
  66. gd = data['gallery_data']['items']
  67. mmd = data['media_metadata']
  68. ents = []
  69. for ent in gd:
  70. mid = ent['media_id']
  71. mime = mmd[mid]['m']
  72. _, ext = mime.split('/')
  73. url = 'https://i.redd.it/%s.%s' % (mid, ext)
  74. ents.append(url)
  75. return ents
  76. else:
  77. raise ValueError('no hint type and missing gallery')
  78. hint = data['post_hint']
  79. if hint == 'link':
  80. if 'crosspost_parent_list' in data:
  81. return extract_media_urls(data['crosspost_parent_list'][0])
  82. else:
  83. # Not a reddit crosspost.
  84. return []
  85. if hint == 'hosted:video':
  86. return [data['secure_media']['reddit_video']['fallback_url']]
  87. if hint == 'image':
  88. return [data['url']]
  89. if hint == 'rich:video':
  90. # TODO maybe use thumbnail?
  91. print('got a video at', data['url'], 'but ignoring')
  92. raise ValueError('unknown hint type ' + hint)
  93. def process_post(post):
  94. d = post['data']
  95. media = []
  96. try:
  97. media = extract_media_urls(d)
  98. except Exception as e:
  99. print('error processing ' + d['id'] + ': ' + str(e))
  100. return {
  101. 'rid': d['id'],
  102. 'title': d['title'],
  103. 'username': d['author'],
  104. 'score': d['ups'],
  105. 'media': media
  106. }
  107. def query_tops(config):
  108. url = 'https://www.reddit.com/r/%s/hot.json' % config['subreddit']
  109. resp = requests.get(url, headers = {'User-agent': 'mastoreddit'})
  110. resp.raise_for_status()
  111. j = resp.json()
  112. posts = []
  113. for post in j['data']['children']:
  114. if post['kind'] != 't3':
  115. print('found post of kind', post['kind'])
  116. p = process_post(post)
  117. lt = p['title'].lower()
  118. if any(f in lt for f in config['skip_titles']):
  119. continue
  120. posts.append(p)
  121. return posts
  122. def filter_posts(config, posts, filt):
  123. tp = config['top_posts']
  124. ok = []
  125. filt = set(filt)
  126. newfilt = set()
  127. for i, p in enumerate(posts):
  128. if i < tp and p['score'] >= config['min_score']:
  129. if p['rid'] not in filt:
  130. ok.append(p)
  131. else:
  132. print('filtered out:', p['rid'], p['title'])
  133. newfilt.add(p['rid'])
  134. continue
  135. if p['rid'] in filt:
  136. newfilt.add(p['rid'])
  137. continue
  138. print('ignoring for now:', p['rid'], p['title'])
  139. return ok, list(newfilt)
  140. def just_toot(config, msg):
  141. masto = make_masto(config)
  142. masto.toot(msg)
  143. def main():
  144. config = load_config()
  145. masto = make_masto(config)
  146. recents = load_recents()
  147. res = query_tops(config)
  148. okps, newrecents = filter_posts(config, res, recents)
  149. for p in okps:
  150. print('posting', p['rid'], '-', p['title'])
  151. try:
  152. make_post(masto, p)
  153. except Exception as e:
  154. print('error posting:', p['rid'], str(e))
  155. save_recents(newrecents)
  156. if __name__ == '__main__':
  157. main()