You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

searchwkr.py 1.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import json
  5. from datetime import datetime
  6. import inventory
  7. import searchlib
  8. import sbenv
  9. def main(argv):
  10. # Setup
  11. searchlib.prep_nltk()
  12. # Actually do the query.
  13. today = datetime.now()
  14. recents_before = load_recents()
  15. res = searchlib.query_day(today, preloadurls=recents_before)
  16. print(json.dumps(res, indent=' '))
  17. # Save the urls we've seen now.
  18. recents_before['urls'].append(list(map(lambda e: e['u'], res)))
  19. save_recents(recents_before)
  20. # Now send them up.
  21. ok = True
  22. for e in res:
  23. print('uploading', e['u'], '...')
  24. try:
  25. send_item(today, e)
  26. except Exception as e:
  27. print('failed!', str(e))
  28. ok = False
  29. return 0 if ok else 1
  30. def send_item(day, ent):
  31. body = {
  32. 'date': day.strftime(inventory.DATE_FORMAT),
  33. 'desc': ent
  34. }
  35. h = {
  36. 'Authorization': 'Bearer %s' % sbenv.get_admin_key(),
  37. }
  38. surl = sbenv.get_submit_url()
  39. r = requests.post(surl, headers=h, data=json.dumps(body))
  40. r.raise_for_status()
  41. def get_recents_path():
  42. return os.path.join(sbenv.get_worker_datadir(), 'recents.json')
  43. # Loads the recents file, creating a new one if it's from a different day.
  44. def load_recents():
  45. cdate = datetime.now().strftime(inventory.DATE_FORMAT)
  46. rp = get_recents_path()
  47. if not os.path.exists(rp):
  48. return {
  49. 'cdate': cdate,
  50. 'urls': []
  51. }
  52. with open(rp, 'r') as f:
  53. j = json.load(f)
  54. if j['cdate'] != cdate:
  55. return {
  56. 'cdate': cdate,
  57. 'urls': []
  58. }
  59. else:
  60. return j
  61. # Saves the recents file.
  62. def save_recents(recents):
  63. with open(get_recents_path(), 'w') as f:
  64. json.dump(recents, f, indent=' ')
  65. if __name__ == '__main__':
  66. sys.exit(main(sys.argv))