Browse Source

Added submission functionality.

master
Trey Del Bonis 2 years ago
parent
commit
76e02162fc
3 changed files with 100 additions and 34 deletions
  1. 24
    0
      app.py
  2. 62
    32
      searchlib.py
  3. 14
    2
      templates/main.htm

+ 24
- 0
app.py View File

@@ -116,6 +116,30 @@ async def handle_flag(req: Request, date: str = Form(...), article: str = Form(.

return make_html_redirect_response('/')

@app.post('/action/submit')
async def handle_submit(req: Request, article: str = Form(...)):
ipaddr = req.client.host

today_str = datetime.now().strftime(inventory.DATE_FORMAT)

fetched_art = searchlib.fetch_article(article)
if fetched_art is None:
return make_html_redirect_response('/')

eff_date = fetched_art['nd'] if 'nd' in fetched_art else today_str

# Now process it so we can tell that it's a definite match.
proced_art = searchlib.process_day_results(eff_date, [fetched_art])
print(proced_art)
if len(proced_art['pass']) == 0:
return make_html_redirect_response('/')

# If it all looks good then store it and report it.
await add_article(eff_date, fetched_art)
await reporthook.send_report('address %s submitted good-looking article %s' % (ipaddr, article))

return make_html_redirect_response('/')

################################
# API endpoints
################################

+ 62
- 32
searchlib.py View File

@@ -157,47 +157,77 @@ def query_range(startdate, numdays, preloadurls=None):
for i in range(numdays):
d = startdate + (oneday * i)
print(d)
qresults = query_for_date(d, cookiejar)
dres = _query_day_and_fetch(d, cookiejar, seenurls)
dateurls[d.strftime('%Y-%m-%d')] = dres

dayresults = []
for rurl, rtitle in qresults:
if rurl in seenurls:
continue
seenurls.add(rurl)
return dateurls
def query_day(date, preloadurls=None):
cookiejar = load_cookiejar()
seenurls = set() if preloadurls is None else set(preloadurls)

rent = {
'u': rurl,
't': rtitle,
}
res = _query_day_and_fetch(date, cookiejar, seenurls)
return res

art = None
try:
u = urllib.parse.urlparse(rurl)
if (u.path == '/' or u.path == '') and u.params == '':
print('url is for website main page and has no params, probably not a news article:', rurl)
continue
def _query_day_and_fetch(date, cookiejar, seenurls_mut):
qresults = query_for_date(d, cookiejar)

print('processing', rurl)
a = newspaper.Article(rurl)
a.download()
a.parse()
a.nlp()
dayresults = []
for rurl, rtitle in qresults:
if rurl in seenurls:
continue
seenurls_mut.add(rurl)

rent['nt'] = a.title
try:
rent['nd'] = a.publish_date.strftime('%Y-%m-%d')
except:
pass
rent['nkw'] = a.keywords
rent = {
'u': rurl,
't': rtitle,
}

except Exception as e:
print(str(e))
fa = _fetch_article(rurl, cookiejar)
if fa is not None:
rent.update(fa)

dayresults.append(rent)
dayresults.append(rent)

dateurls[d.strftime('%Y-%m-%d')] = dayresults
return dayresults

return dateurls
def fetch_article(url):
cj = load_cookiejar()

fa = _fetch_article(url, cj)
if fa is None:
return None

fa['u'] = url
fa['t'] = fa['nt']

return fa

def _fetch_article(rurl, cookiejar):
rent = {}
try:
u = urllib.parse.urlparse(rurl)
if (u.path == '/' or u.path == '') and u.params == '':
print('url is for website main page and has no params, probably not a news article:', rurl)
return None

print('processing', rurl)
a = newspaper.Article(rurl)
a.download()
a.parse()
a.nlp()

rent['nt'] = a.title
try:
rent['nd'] = a.publish_date.strftime('%Y-%m-%d')
except:
pass
rent['nkw'] = a.keywords

except Exception as e:
print(str(e))

return rent

_query_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'

+ 14
- 2
templates/main.htm View File

@@ -43,8 +43,8 @@
I made this website because I thought it was funny. It works by searching
for news articles with certain keywords on Google on each day. This only
works well if the incidents are reported on on news sites/etc, but there are
false-positives (which you can report soon!). There's also a way to request
incident data directly the MassDOT, but that's a lot more work to pull data
false-positives (which you can report!). There's also a way to request
incident data directly from MassDOT, but that's a lot more work to pull data
from and I'm not sure how quickly it's updated so I'll leave that to someone
more dedicated to <a href="https://code.tr3y.io/treyzania/storrowed.boston">work on</a>.
</p>
@@ -81,6 +81,18 @@
</div>
{% endfor %}
</div>

<h1>Submit</h1>

<div id="submit">
Found an article that my shitty code missed? Submit it here!

<form action="/action/submit" method="POST">
<label for="submit_url">Evidence</label>
<input type="url" id="submit_url" name="article" required/>
<input type="submit" value="Submit"/>
</form>
</div>
</div>

{% endblock %}

Loading…
Cancel
Save