You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

searchwkr.py 1.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import json
  5. from datetime import datetime
  6. import requests
  7. import inventory
  8. import searchlib
  9. import sbenv
  10. def main(argv):
  11. # Setup
  12. searchlib.prep_nltk()
  13. # Actually do the query.
  14. today = datetime.now()
  15. recents_before = load_recents()
  16. res = searchlib.query_day(today, preloadurls=recents_before)
  17. print(json.dumps(res, indent=' '))
  18. # Save the urls we've seen now.
  19. recents_before['urls'].extend(map(lambda e: e['u'], res))
  20. save_recents(recents_before)
  21. # Now send them up.
  22. ok = True
  23. for e in res:
  24. print('uploading', e['u'], '...')
  25. try:
  26. send_item(today, e)
  27. except Exception as e:
  28. print('failed!', str(e))
  29. ok = False
  30. return 0 if ok else 1
  31. def send_item(day, ent):
  32. body = {
  33. 'date': day.strftime(inventory.DATE_FORMAT),
  34. 'desc': ent
  35. }
  36. h = {
  37. 'Authorization': 'Bearer %s' % sbenv.get_admin_key(),
  38. }
  39. surl = sbenv.get_submit_url()
  40. r = requests.post(surl, headers=h, data=json.dumps(body))
  41. r.raise_for_status()
  42. def get_recents_path():
  43. return os.path.join(sbenv.get_worker_datadir(), 'recents.json')
  44. # Loads the recents file, creating a new one if it's from a different day.
  45. def load_recents():
  46. cdate = datetime.now().strftime(inventory.DATE_FORMAT)
  47. rp = get_recents_path()
  48. if not os.path.exists(rp):
  49. return {
  50. 'cdate': cdate,
  51. 'urls': []
  52. }
  53. with open(rp, 'r') as f:
  54. j = json.load(f)
  55. if j['cdate'] != cdate:
  56. return {
  57. 'cdate': cdate,
  58. 'urls': []
  59. }
  60. else:
  61. return j
  62. # Saves the recents file.
  63. def save_recents(recents):
  64. with open(get_recents_path(), 'w') as f:
  65. json.dump(recents, f, indent=' ')
  66. if __name__ == '__main__':
  67. sys.exit(main(sys.argv))