2 years ago · 76e02162fc
--- a/app.py
+++ b/app.py
@@ -116,6 +116,30 @@ async def handle_flag(req: Request, date: str = Form(...), article: str = Form(.

    return make_html_redirect_response('/')

@app.post('/action/submit')
 async def handle_submit(req: Request, article: str = Form(...)):
    ipaddr = req.client.host

    today_str = datetime.now().strftime(inventory.DATE_FORMAT)

    fetched_art = searchlib.fetch_article(article)
    if fetched_art is None:
        return make_html_redirect_response('/')

    eff_date = fetched_art['nd'] if 'nd' in fetched_art else today_str

    # Now process it so we can tell that it's a definite match.
    proced_art = searchlib.process_day_results(eff_date, [fetched_art])
    print(proced_art)
    if len(proced_art['pass']) == 0:
        return make_html_redirect_response('/')

    # If it all looks good then store it and report it.
    await add_article(eff_date, fetched_art)
    await reporthook.send_report('address %s submitted good-looking article %s' % (ipaddr, article))

    return make_html_redirect_response('/')

 ################################
 # API endpoints
 ################################
--- a/searchlib.py
+++ b/searchlib.py
@@ -157,47 +157,77 @@ def query_range(startdate, numdays, preloadurls=None):
    for i in range(numdays):
        d = startdate + (oneday * i)
        print(d)
        qresults = query_for_date(d, cookiejar)
        dres = _query_day_and_fetch(d, cookiejar, seenurls)
        dateurls[d.strftime('%Y-%m-%d')] = dres

        dayresults = []
        for rurl, rtitle in qresults:
            if rurl in seenurls:
                continue
            seenurls.add(rurl)
    return dateurls

 def query_day(date, preloadurls=None):
    cookiejar = load_cookiejar()
    seenurls = set() if preloadurls is None else set(preloadurls)

            rent = {
                'u': rurl,
                't': rtitle,
            }
    res = _query_day_and_fetch(date, cookiejar, seenurls)
    return res

            art = None
            try:
                u = urllib.parse.urlparse(rurl)
                if (u.path == '/' or u.path == '') and u.params == '':
                    print('url is for website main page and has no params, probably not a news article:', rurl)
                    continue
 def _query_day_and_fetch(date, cookiejar, seenurls_mut):
    qresults = query_for_date(d, cookiejar)

                print('processing', rurl)
                a = newspaper.Article(rurl)
                a.download()
                a.parse()
                a.nlp()
    dayresults = []
    for rurl, rtitle in qresults:
        if rurl in seenurls:
            continue
        seenurls_mut.add(rurl)

                rent['nt'] = a.title
                try:
                    rent['nd'] = a.publish_date.strftime('%Y-%m-%d')
                except:
                    pass
                rent['nkw'] = a.keywords
        rent = {
            'u': rurl,
            't': rtitle,
        }

            except Exception as e:
                print(str(e))
        fa = _fetch_article(rurl, cookiejar)
        if fa is not None:
            rent.update(fa)

            dayresults.append(rent)
        dayresults.append(rent)

        dateurls[d.strftime('%Y-%m-%d')] = dayresults
    return dayresults

    return dateurls
 def fetch_article(url):
    cj = load_cookiejar()

    fa = _fetch_article(url, cj)
    if fa is None:
        return None

    fa['u'] = url
    fa['t'] = fa['nt']

    return fa

 def _fetch_article(rurl, cookiejar):
    rent = {}
    try:
        u = urllib.parse.urlparse(rurl)
        if (u.path == '/' or u.path == '') and u.params == '':
            print('url is for website main page and has no params, probably not a news article:', rurl)
            return None

        print('processing', rurl)
        a = newspaper.Article(rurl)
        a.download()
        a.parse()
        a.nlp()

        rent['nt'] = a.title
        try:
            rent['nd'] = a.publish_date.strftime('%Y-%m-%d')
        except:
            pass
        rent['nkw'] = a.keywords

    except Exception as e:
        print(str(e))

    return rent

 _query_headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
--- a/templates/main.htm
+++ b/templates/main.htm
@@ -43,8 +43,8 @@
    I made this website because I thought it was funny.  It works by searching
    for news articles with certain keywords on Google on each day.  This only
    works well if the incidents are reported on on news sites/etc, but there are
    false-positives (which you can report soon!).  There's also a way to request
    incident data directly the MassDOT, but that's a lot more work to pull data
    false-positives (which you can report!).  There's also a way to request
    incident data directly from MassDOT, but that's a lot more work to pull data
    from and I'm not sure how quickly it's updated so I'll leave that to someone
    more dedicated to <a href="https://code.tr3y.io/treyzania/storrowed.boston">work on</a>.
  </p>
@@ -81,6 +81,18 @@
    </div>
    {% endfor %}
  </div>

  <h1>Submit</h1>

  <div id="submit">
    Found an article that my shitty code missed?  Submit it here!

    <form action="/action/submit" method="POST">
      <label for="submit_url">Evidence</label>
      <input type="url" id="submit_url" name="article" required/>
      <input type="submit" value="Submit"/>
    </form>
  </div>
 </div>

 {% endblock %}