MAX_SEARCH_DAYS = 180 | MAX_SEARCH_DAYS = 180 | ||||
MAX_SHOW_DAYS = 20 | MAX_SHOW_DAYS = 20 | ||||
REPORT_HORIZON = 180 | |||||
MAX_USER_REPORTS_PER_DAY = 3 | |||||
################################ | ################################ | ||||
# Core configuration | # Core configuration | ||||
return tmplts.TemplateResponse('main.htm', p) | return tmplts.TemplateResponse('main.htm', p) | ||||
@app.post('/action/flag') | |||||
async def handle_flag(req: Request, date: str = Form(...), article: str = Form(...)): | |||||
ipaddr = req.client.host | |||||
try: | |||||
today = datetime.now() | |||||
pdate = datetime.strptime(date, inventory.DATE_FORMAT) | |||||
if pdate > today or (today - pdate).days > REPORT_HORIZON: | |||||
raise ValueError('bad date') | |||||
except Exception as e: | |||||
return JSONResponse({'status': 'error'}, status_code=400) | |||||
flags = await inventory.load_date_flags_async(pdate) | |||||
# Make sure it's not a duplicate and limit the number of reports | |||||
nreporter = 0 | |||||
for e in flags: | |||||
if e['src'] == ipaddr: | |||||
if e['url'] == article: | |||||
return {'status': 'OK'} | |||||
nreporter += 1 | |||||
if nreporter + 1 >= MAX_USER_REPORTS_PER_DAY: | |||||
print('user', ipaddr, 'looking sussy') | |||||
await reporthook.send_report('address %s made more reports for %s than allowed' % (ipaddr, date)) | |||||
return JSONResponse({'status': 'error'}, status_code=429) | |||||
await reporthook.send_report('address %s reported url %s' % (ipaddr, article)) | |||||
flags.append({ | |||||
'src': ipaddr, | |||||
'url': article, | |||||
}) | |||||
await inventory.save_date_flags_async(pdate, flags) | |||||
return make_html_redirect_response('/') | |||||
################################ | ################################ | ||||
# API endpoints | # API endpoints | ||||
################################ | ################################ | ||||
for i in range(MAX_SEARCH_DAYS): | for i in range(MAX_SEARCH_DAYS): | ||||
that_day = today - i * day_dur | that_day = today - i * day_dur | ||||
report = await inventory.load_date_report_async(that_day) | report = await inventory.load_date_report_async(that_day) | ||||
flags = await inventory.load_date_flags_async(that_day) | |||||
if len(report) > 0: | if len(report) > 0: | ||||
reports[that_day.strftime(inventory.DATE_FORMAT)] = report | |||||
reports[that_day.strftime(inventory.DATE_FORMAT)] = { | |||||
'articles': report, | |||||
'flags': flags, | |||||
} | |||||
return reports | return reports | ||||
def convert_days_from_articles(rarts): | |||||
processed = searchlib.process_results(rarts) | |||||
def convert_days_from_articles(days): | |||||
output = [] | output = [] | ||||
for dstr, arts in processed.items(): | |||||
for dstr, parts in days.items(): | |||||
dr = searchlib.process_day_results(dstr, parts['articles']) | |||||
flags = {e['url'] for e in parts['flags']} | |||||
day = { | day = { | ||||
'date': dstr, | 'date': dstr, | ||||
'links': [convert_article(a) for a in arts['pass']], | |||||
'maybe_links': [convert_article(a) for a in arts['maybe']] | |||||
'links': [], | |||||
'maybe_links': [] | |||||
} | } | ||||
# Process hard passes. | |||||
for a in dr['pass']: | |||||
ca = convert_article(a) | |||||
if a['url'] not in flags: | |||||
day['links'].append(ca) | |||||
else: | |||||
day['maybe_links'].append(ca) | |||||
# Process weak articles. | |||||
for a in dr['maybe']: | |||||
ca = convert_article(a) | |||||
if a['url'] not in flags: | |||||
day['maybe_links'].append(ca) | |||||
if len(day['links']) > 0: | if len(day['links']) > 0: | ||||
output.append(day) | output.append(day) | ||||
lowest = ndays | lowest = ndays | ||||
return lowest | return lowest | ||||
def make_html_redirect_response(url): | |||||
return HTMLResponse('<head><meta http-equiv="Refresh" content="0; URL=' + url + '"></head>') |
def get_datadir(): | def get_datadir(): | ||||
return os.getenv('SB_DATADIR') | return os.getenv('SB_DATADIR') | ||||
def get_article_path(slug): | |||||
return os.path.join(get_datadir(), 'articles', slug + '.json') | |||||
def load_article_by_slug(slug): | |||||
with open(get_article_path(slug), 'r') as f: | |||||
return json.load(f) | |||||
def save_article(data): | |||||
with open(get_article_path(data['slug']), 'r') as f: | |||||
return json.save(data, f) | |||||
def get_date_report_path(date): | def get_date_report_path(date): | ||||
dstr = date.strftime(DATE_FORMAT) | dstr = date.strftime(DATE_FORMAT) | ||||
return os.path.join(get_datadir(), 'days', dstr + '.json') | return os.path.join(get_datadir(), 'days', dstr + '.json') | ||||
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) | os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) | ||||
async with aiofiles.open(path, 'w') as f: | async with aiofiles.open(path, 'w') as f: | ||||
await f.write(json.dumps(data)) | await f.write(json.dumps(data)) | ||||
def get_date_flags_path(date): | |||||
dstr = date.strftime(DATE_FORMAT) | |||||
return os.path.join(get_datadir(), 'flags', dstr + '.json') | |||||
async def load_date_flags_async(date): | |||||
path = get_date_flags_path(date) | |||||
if not os.path.exists(path): | |||||
return [] | |||||
async with aiofiles.open(path, 'r') as f: | |||||
return json.loads(await f.read()) | |||||
async def save_date_flags_async(date, data): | |||||
path = get_date_flags_path(date) | |||||
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) | |||||
async with aiofiles.open(path, 'w') as f: | |||||
await f.write(json.dumps(data)) |
def process_results(rtbl): | def process_results(rtbl): | ||||
rw_domains = [(re.compile(reg), rw) for reg, rw in REWRITE_DOMAINS] | rw_domains = [(re.compile(reg), rw) for reg, rw in REWRITE_DOMAINS] | ||||
excl_domains = [re.compile(regex) for regex in EXCL_DOMAINS] | excl_domains = [re.compile(regex) for regex in EXCL_DOMAINS] | ||||
results = {} | results = {} | ||||
for qdate, rents in rtbl.items(): | for qdate, rents in rtbl.items(): | ||||
for rent in rents: | |||||
uscheme, unl, upath, upar, uq, ufrag = urllib.parse.urlparse(rent['u']) | |||||
# Check skip because of domain exclusion | |||||
skip_cuz_url = False | |||||
for reg in excl_domains: | |||||
if reg.fullmatch(unl): | |||||
skip_cuz_url = True | |||||
break | |||||
if skip_cuz_url: | |||||
continue | |||||
results[qdate] = _process_day_result(rw_domains, excl_domains, qdate, rents) | |||||
return results | |||||
# Rewrite the domain, if applicable | |||||
for reg, rw in rw_domains: | |||||
if reg.fullmatch(unl): | |||||
unl = rw | |||||
break | |||||
def process_day_results(query_date, rents): | |||||
rw_domains = [(re.compile(reg), rw) for reg, rw in REWRITE_DOMAINS] | |||||
excl_domains = [re.compile(regex) for regex in EXCL_DOMAINS] | |||||
return _process_day_results(rw_domains, excl_domains, query_date, rents) | |||||
# Check skip because of URL path file extension | |||||
skip_cuz_pathext = False | |||||
for ext in EXCL_PATH_EXTS: | |||||
if upath.endswith(ext): | |||||
skip_cuz_pathext = True | |||||
break | |||||
if skip_cuz_pathext: | |||||
continue | |||||
def _process_day_results(rw_domains, excl_domains, qdate, rents): | |||||
articles = { | |||||
'pass': [], | |||||
'maybe': [], | |||||
} | |||||
has_kws = 'nkw' in rent | |||||
# Check skip because of hard exclusion keywords | |||||
skip_cuz_kws = False | |||||
if has_kws: | |||||
for kw in EXCL_KWS: | |||||
for akw in rent['nkw']: | |||||
if kw in akw: | |||||
skip_cuz_kws = True | |||||
if skip_cuz_kws: | |||||
break | |||||
for rent in rents: | |||||
uscheme, unl, upath, upar, uq, ufrag = urllib.parse.urlparse(rent['u']) | |||||
# Check skip because of domain exclusion | |||||
skip_cuz_url = False | |||||
for reg in excl_domains: | |||||
if reg.fullmatch(unl): | |||||
skip_cuz_url = True | |||||
break | |||||
if skip_cuz_url: | |||||
continue | |||||
# Rewrite the domain, if applicable | |||||
for reg, rw in rw_domains: | |||||
if reg.fullmatch(unl): | |||||
unl = rw | |||||
break | |||||
# Check skip because of URL path file extension | |||||
skip_cuz_pathext = False | |||||
for ext in EXCL_PATH_EXTS: | |||||
if upath.endswith(ext): | |||||
skip_cuz_pathext = True | |||||
break | |||||
if skip_cuz_pathext: | |||||
continue | |||||
has_kws = 'nkw' in rent | |||||
# Check skip because of hard exclusion keywords | |||||
skip_cuz_kws = False | |||||
if has_kws: | |||||
for kw in EXCL_KWS: | |||||
for akw in rent['nkw']: | |||||
if kw in akw: | |||||
skip_cuz_kws = True | |||||
if skip_cuz_kws: | if skip_cuz_kws: | ||||
continue | |||||
# Now characterize what kind of entry it is. | |||||
has_date = 'nd' in rent | |||||
has_title = 'nt' in rent | |||||
has_pass_kw = False | |||||
if has_kws: | |||||
for kw in PASS_KWS: | |||||
for akw in rent['nkw']: | |||||
if kw in akw: | |||||
has_pass_kw = True | |||||
if has_pass_kw: | |||||
break | |||||
# Try to assemble a record to store the thing. | |||||
eff_date = rent['nd'] if has_date else qdate | |||||
eff_url = urllib.parse.urlunparse([uscheme, unl, upath, upar, uq, '']) | |||||
item = { | |||||
'slug': gen_slug(rent['t'], eff_date), | |||||
'url': eff_url, | |||||
'gtitle': rent['t'], | |||||
'title': rent['nt'] if has_title else None, | |||||
'date': eff_date, | |||||
'kws': rent['nkw'] if has_kws else None | |||||
} | |||||
if eff_date not in results: | |||||
results[eff_date] = { | |||||
'pass': [], | |||||
'maybe': [] | |||||
} | |||||
break | |||||
if skip_cuz_kws: | |||||
continue | |||||
if has_pass_kw: | |||||
results[eff_date]['pass'].append(item) | |||||
else: | |||||
results[eff_date]['maybe'].append(item) | |||||
# Now characterize what kind of entry it is. | |||||
has_date = 'nd' in rent | |||||
has_title = 'nt' in rent | |||||
has_pass_kw = False | |||||
if has_kws: | |||||
for kw in PASS_KWS: | |||||
for akw in rent['nkw']: | |||||
if kw in akw: | |||||
has_pass_kw = True | |||||
if has_pass_kw: | |||||
break | |||||
return results | |||||
# Try to assemble a record to store the thing. | |||||
eff_date = rent['nd'] if has_date else qdate | |||||
eff_url = urllib.parse.urlunparse([uscheme, unl, upath, upar, uq, '']) | |||||
item = { | |||||
'slug': gen_slug(rent['t'], eff_date), | |||||
'url': eff_url, | |||||
'gtitle': rent['t'], | |||||
'title': rent['nt'] if has_title else None, | |||||
'date': eff_date, | |||||
'kws': rent['nkw'] if has_kws else None | |||||
} | |||||
if has_pass_kw: | |||||
articles['pass'].append(item) | |||||
else: | |||||
articles['maybe'].append(item) | |||||
return articles | |||||
SLUG_CHUNKS = re.compile('[abcdefghijklmnopqrstuvwxyz1234567890]+') | SLUG_CHUNKS = re.compile('[abcdefghijklmnopqrstuvwxyz1234567890]+') | ||||
{% extends 'base.htm' %} | {% extends 'base.htm' %} | ||||
{% macro render_article_link(desc) %} | |||||
<li><a href="{{ desc.url }}">{{ desc.title }}</a></li> | |||||
{% macro render_article_link(desc, date) %} | |||||
<li> | |||||
<a href="{{ desc.url }}">{{ desc.title }}</a> | |||||
<form action="/action/flag" method="POST"> | |||||
<input type="hidden" name="date" value="{{ date }}"/> | |||||
<input type="hidden" name="article" value="{{ desc.url }}"/> | |||||
<input type="submit" value="Flag False Positive"/> | |||||
</form> | |||||
</li> | |||||
{% endmacro %} | {% endmacro %} | ||||
{% block content %} | {% block content %} | ||||
false-positives (which you can report soon!). There's also a way to request | false-positives (which you can report soon!). There's also a way to request | ||||
incident data directly the MassDOT, but that's a lot more work to pull data | incident data directly the MassDOT, but that's a lot more work to pull data | ||||
from and I'm not sure how quickly it's updated so I'll leave that to someone | from and I'm not sure how quickly it's updated so I'll leave that to someone | ||||
more dedicated to work on. | |||||
more dedicated to <a href="https://code.tr3y.io/treyzania/storrowed.boston">work on</a>. | |||||
</p> | </p> | ||||
<h1>Articles</h1> | <h1>Articles</h1> | ||||
<h3>{{ d.date }}</h3> | <h3>{{ d.date }}</h3> | ||||
<ul> | <ul> | ||||
{% for l in d.links %} | {% for l in d.links %} | ||||
{{ render_article_link(l) }} | |||||
{{ render_article_link(l, d.date) }} | |||||
{% endfor %} | {% endfor %} | ||||
</ul> | </ul> | ||||
{% if d.maybe_links|length > 0 %} | {% if d.maybe_links|length > 0 %} | ||||
<summary><em>and {{ d.maybe_links|length }} weak match(es)</em></summary> | <summary><em>and {{ d.maybe_links|length }} weak match(es)</em></summary> | ||||
<ul> | <ul> | ||||
{% for l in d.maybe_links %} | {% for l in d.maybe_links %} | ||||
{{ render_article_link(l) }} | |||||
{{ render_article_link(l, d.date) }} | |||||
{% endfor %} | {% endfor %} | ||||
</ul> | </ul> | ||||
</details> | </details> |