Преглед изворни кода

Added submission functionality.

master
Trey Del Bonis пре 2 година
родитељ
комит
76e02162fc
3 измењених фајлова са 100 додато и 34 уклоњено
  1. 24
    0
      app.py
  2. 62
    32
      searchlib.py
  3. 14
    2
      templates/main.htm

+ 24
- 0
app.py Прегледај датотеку



return make_html_redirect_response('/') return make_html_redirect_response('/')


@app.post('/action/submit')
async def handle_submit(req: Request, article: str = Form(...)):
ipaddr = req.client.host

today_str = datetime.now().strftime(inventory.DATE_FORMAT)

fetched_art = searchlib.fetch_article(article)
if fetched_art is None:
return make_html_redirect_response('/')

eff_date = fetched_art['nd'] if 'nd' in fetched_art else today_str

# Now process it so we can tell that it's a definite match.
proced_art = searchlib.process_day_results(eff_date, [fetched_art])
print(proced_art)
if len(proced_art['pass']) == 0:
return make_html_redirect_response('/')

# If it all looks good then store it and report it.
await add_article(eff_date, fetched_art)
await reporthook.send_report('address %s submitted good-looking article %s' % (ipaddr, article))

return make_html_redirect_response('/')

################################ ################################
# API endpoints # API endpoints
################################ ################################

+ 62
- 32
searchlib.py Прегледај датотеку

for i in range(numdays): for i in range(numdays):
d = startdate + (oneday * i) d = startdate + (oneday * i)
print(d) print(d)
qresults = query_for_date(d, cookiejar)
dres = _query_day_and_fetch(d, cookiejar, seenurls)
dateurls[d.strftime('%Y-%m-%d')] = dres


dayresults = []
for rurl, rtitle in qresults:
if rurl in seenurls:
continue
seenurls.add(rurl)
return dateurls
def query_day(date, preloadurls=None):
cookiejar = load_cookiejar()
seenurls = set() if preloadurls is None else set(preloadurls)


rent = {
'u': rurl,
't': rtitle,
}
res = _query_day_and_fetch(date, cookiejar, seenurls)
return res


art = None
try:
u = urllib.parse.urlparse(rurl)
if (u.path == '/' or u.path == '') and u.params == '':
print('url is for website main page and has no params, probably not a news article:', rurl)
continue
def _query_day_and_fetch(date, cookiejar, seenurls_mut):
qresults = query_for_date(d, cookiejar)


print('processing', rurl)
a = newspaper.Article(rurl)
a.download()
a.parse()
a.nlp()
dayresults = []
for rurl, rtitle in qresults:
if rurl in seenurls:
continue
seenurls_mut.add(rurl)


rent['nt'] = a.title
try:
rent['nd'] = a.publish_date.strftime('%Y-%m-%d')
except:
pass
rent['nkw'] = a.keywords
rent = {
'u': rurl,
't': rtitle,
}


except Exception as e:
print(str(e))
fa = _fetch_article(rurl, cookiejar)
if fa is not None:
rent.update(fa)


dayresults.append(rent)
dayresults.append(rent)


dateurls[d.strftime('%Y-%m-%d')] = dayresults
return dayresults


return dateurls
def fetch_article(url):
cj = load_cookiejar()

fa = _fetch_article(url, cj)
if fa is None:
return None

fa['u'] = url
fa['t'] = fa['nt']

return fa

def _fetch_article(rurl, cookiejar):
rent = {}
try:
u = urllib.parse.urlparse(rurl)
if (u.path == '/' or u.path == '') and u.params == '':
print('url is for website main page and has no params, probably not a news article:', rurl)
return None

print('processing', rurl)
a = newspaper.Article(rurl)
a.download()
a.parse()
a.nlp()

rent['nt'] = a.title
try:
rent['nd'] = a.publish_date.strftime('%Y-%m-%d')
except:
pass
rent['nkw'] = a.keywords

except Exception as e:
print(str(e))

return rent


_query_headers = { _query_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'

+ 14
- 2
templates/main.htm Прегледај датотеку

I made this website because I thought it was funny. It works by searching I made this website because I thought it was funny. It works by searching
for news articles with certain keywords on Google on each day. This only for news articles with certain keywords on Google on each day. This only
works well if the incidents are reported on on news sites/etc, but there are works well if the incidents are reported on on news sites/etc, but there are
false-positives (which you can report soon!). There's also a way to request
incident data directly the MassDOT, but that's a lot more work to pull data
false-positives (which you can report!). There's also a way to request
incident data directly from MassDOT, but that's a lot more work to pull data
from and I'm not sure how quickly it's updated so I'll leave that to someone from and I'm not sure how quickly it's updated so I'll leave that to someone
more dedicated to <a href="https://code.tr3y.io/treyzania/storrowed.boston">work on</a>. more dedicated to <a href="https://code.tr3y.io/treyzania/storrowed.boston">work on</a>.
</p> </p>
</div> </div>
{% endfor %} {% endfor %}
</div> </div>

<h1>Submit</h1>

<div id="submit">
Found an article that my shitty code missed? Submit it here!

<form action="/action/submit" method="POST">
<label for="submit_url">Evidence</label>
<input type="url" id="submit_url" name="article" required/>
<input type="submit" value="Submit"/>
</form>
</div>
</div> </div>


{% endblock %} {% endblock %}

Loading…
Откажи
Сачувај