NyaaV3/nyaa/backend.py
Nicolas F 8ee687cb89 backend: count IP uploads in the user ratelimit
Users could double their ratelimit by uploading some torrents as
anonymous submissions, then log into their account and post more.

We can stop this by making the filter_uploader helper function use
an sqlalchemy.or_ query to check for uploads from either that user
or that user's IP.
2019-08-22 11:55:11 +02:00

390 lines
15 KiB
Python

import json
import os
import re
from datetime import datetime, timedelta
from ipaddress import ip_address
import flask
from werkzeug import secure_filename
import sqlalchemy
from orderedset import OrderedSet
from nyaa import models, utils
from nyaa.extensions import db
app = flask.current_app
# Blacklists for _validate_torrent_filenames
# TODO: consider moving to config.py?
CHARACTER_BLACKLIST = [
'\u202E', # RIGHT-TO-LEFT OVERRIDE
]
FILENAME_BLACKLIST = [
# Windows reserved filenames
'con',
'nul',
'prn',
'aux',
'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
]
# Invalid RSS characters regex, used to sanitize some strings
ILLEGAL_XML_CHARS_RE = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]')
def sanitize_string(string, replacement='\uFFFD'):
''' Simply replaces characters based on a regex '''
return ILLEGAL_XML_CHARS_RE.sub(replacement, string)
class TorrentExtraValidationException(Exception):
def __init__(self, errors={}):
self.errors = errors
@utils.cached_function
def get_category_id_map():
''' Reads database for categories and turns them into a dict with
ids as keys and name list as the value, ala
{'1_0': ['Anime'], '1_2': ['Anime', 'English-translated'], ...} '''
cat_id_map = {}
for main_cat in models.MainCategory.query:
cat_id_map[main_cat.id_as_string] = [main_cat.name]
for sub_cat in main_cat.sub_categories:
cat_id_map[sub_cat.id_as_string] = [main_cat.name, sub_cat.name]
return cat_id_map
def _replace_utf8_values(dict_or_list):
''' Will replace 'property' with 'property.utf-8' and remove latter if it exists.
Thanks, bitcomet! :/ '''
did_change = False
if isinstance(dict_or_list, dict):
for key in [key for key in dict_or_list.keys() if key.endswith('.utf-8')]:
dict_or_list[key.replace('.utf-8', '')] = dict_or_list.pop(key)
did_change = True
for value in dict_or_list.values():
did_change = _replace_utf8_values(value) or did_change
elif isinstance(dict_or_list, list):
for item in dict_or_list:
did_change = _replace_utf8_values(item) or did_change
return did_change
def _recursive_dict_iterator(source):
''' Iterates over a given dict, yielding (key, value) pairs,
recursing inside any dicts. '''
# TODO Make a proper dict-filetree walker
for key, value in source.items():
yield (key, value)
if isinstance(value, dict):
for kv in _recursive_dict_iterator(value):
yield kv
def _validate_torrent_filenames(torrent):
''' Checks path parts of a torrent's filetree against blacklisted characters
and filenames, returning False on rejection '''
file_tree = json.loads(torrent.filelist.filelist_blob.decode('utf-8'))
for path_part, value in _recursive_dict_iterator(file_tree):
if path_part.rsplit('.', 1)[0].lower() in FILENAME_BLACKLIST:
return False
if any(True for c in CHARACTER_BLACKLIST if c in path_part):
return False
return True
def validate_torrent_post_upload(torrent, upload_form=None):
''' Validates a Torrent instance before it's saved to the database.
Enforcing user-and-such-based validations is more flexible here vs WTForm context '''
errors = {
'torrent_file': []
}
# Encorce minimum size for userless uploads
minimum_anonymous_torrent_size = app.config['MINIMUM_ANONYMOUS_TORRENT_SIZE']
if torrent.user is None and torrent.filesize < minimum_anonymous_torrent_size:
errors['torrent_file'].append('Torrent too small for an anonymous uploader')
if not _validate_torrent_filenames(torrent):
errors['torrent_file'].append('Torrent has forbidden characters in filenames')
# Remove keys with empty lists
errors = {k: v for k, v in errors.items() if v}
if errors:
if upload_form:
# Add error messages to the form fields
for field_name, field_errors in errors.items():
getattr(upload_form, field_name).errors.extend(field_errors)
# Clear out the wtforms dict to force a regeneration
upload_form._errors = None
raise TorrentExtraValidationException(errors)
def check_uploader_ratelimit(user):
''' Figures out if user (or IP address from flask.request) may
upload within upload ratelimit.
Returns a tuple of current datetime, count of torrents uploaded
within burst duration and timestamp for next allowed upload. '''
now = datetime.utcnow()
next_allowed_time = now
Torrent = models.Torrent
def filter_uploader(query):
if user:
return query.filter(sqlalchemy.or_(
Torrent.user == user,
Torrent.uploader_ip == ip_address(flask.request.remote_addr).packed))
else:
return query.filter(Torrent.uploader_ip == ip_address(flask.request.remote_addr).packed)
time_range_start = datetime.utcnow() - timedelta(seconds=app.config['UPLOAD_BURST_DURATION'])
# Count torrents uploaded by user/ip within given time period
torrent_count_query = db.session.query(sqlalchemy.func.count(Torrent.id))
torrent_count = filter_uploader(torrent_count_query).filter(
Torrent.created_time >= time_range_start).scalar()
# If user has reached burst limit...
if torrent_count >= app.config['MAX_UPLOAD_BURST']:
# Check how long ago their latest torrent was (we know at least one will exist)
last_torrent = filter_uploader(Torrent.query).order_by(Torrent.created_time.desc()).first()
after_timeout = last_torrent.created_time + timedelta(seconds=app.config['UPLOAD_TIMEOUT'])
if now < after_timeout:
next_allowed_time = after_timeout
return now, torrent_count, next_allowed_time
def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False):
''' Stores a torrent to the database.
May throw TorrentExtraValidationException if the form/torrent fails
post-WTForm validation! Exception messages will also be added to their
relevant fields on the given form. '''
torrent_data = upload_form.torrent_file.parsed_data
# Anonymous uploaders and non-trusted uploaders
no_or_new_account = (not uploading_user
or (uploading_user.age < app.config['RATELIMIT_ACCOUNT_AGE']
and not uploading_user.is_trusted))
if app.config['RATELIMIT_UPLOADS'] and no_or_new_account:
now, torrent_count, next_time = check_uploader_ratelimit(uploading_user)
if next_time > now:
# This will flag the dialog in upload.html red and tell API users what's wrong
upload_form.ratelimit.errors = ["You've gone over the upload ratelimit."]
raise TorrentExtraValidationException()
if not uploading_user:
if app.config['RAID_MODE_LIMIT_UPLOADS']:
# XXX TODO: rename rangebanned to something more generic
upload_form.rangebanned.errors = [app.config['RAID_MODE_UPLOADS_MESSAGE']]
raise TorrentExtraValidationException()
elif models.RangeBan.is_rangebanned(ip_address(flask.request.remote_addr).packed):
upload_form.rangebanned.errors = ["Your IP is banned from "
"uploading anonymously."]
raise TorrentExtraValidationException()
# Delete existing torrent which is marked as deleted
if torrent_data.db_id is not None:
old_torrent = models.Torrent.by_id(torrent_data.db_id)
db.session.delete(old_torrent)
db.session.commit()
# Delete physical file after transaction has been committed
_delete_info_dict(old_torrent)
# The torrent has been validated and is safe to access with ['foo'] etc - all relevant
# keys and values have been checked for (see UploadForm in forms.py for details)
info_dict = torrent_data.torrent_dict['info']
changed_to_utf8 = _replace_utf8_values(torrent_data.torrent_dict)
# Use uploader-given name or grab it from the torrent
display_name = upload_form.display_name.data.strip() or info_dict['name'].decode('utf8').strip()
information = (upload_form.information.data or '').strip()
description = (upload_form.description.data or '').strip()
# Sanitize fields
display_name = sanitize_string(display_name)
information = sanitize_string(information)
description = sanitize_string(description)
torrent_filesize = info_dict.get('length') or sum(
f['length'] for f in info_dict.get('files'))
# In case no encoding, assume UTF-8.
torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8')
torrent = models.Torrent(id=torrent_data.db_id,
info_hash=torrent_data.info_hash,
display_name=display_name,
torrent_name=torrent_data.filename,
information=information,
description=description,
encoding=torrent_encoding,
filesize=torrent_filesize,
user=uploading_user,
uploader_ip=ip_address(flask.request.remote_addr).packed)
# Store bencoded info_dict
info_dict_path = torrent.info_dict_path
info_dict_dir = os.path.dirname(info_dict_path)
os.makedirs(info_dict_dir, exist_ok=True)
with open(info_dict_path, 'wb') as out_file:
out_file.write(torrent_data.bencoded_info_dict)
torrent.stats = models.Statistic()
torrent.has_torrent = True
# Fields with default value will be None before first commit, so set .flags
torrent.flags = 0
torrent.anonymous = upload_form.is_anonymous.data if uploading_user else True
torrent.hidden = upload_form.is_hidden.data
torrent.remake = upload_form.is_remake.data
torrent.complete = upload_form.is_complete.data
# Copy trusted status from user if possible
can_mark_trusted = uploading_user and uploading_user.is_trusted
# To do, automatically mark trusted if user is trusted unless user specifies otherwise
torrent.trusted = upload_form.is_trusted.data if can_mark_trusted else False
# Only allow mods to upload locked torrents
can_mark_locked = uploading_user and uploading_user.is_moderator
torrent.comment_locked = upload_form.is_comment_locked.data if can_mark_locked else False
# Set category ids
torrent.main_category_id, torrent.sub_category_id = \
upload_form.category.parsed_data.get_category_ids()
# To simplify parsing the filelist, turn single-file torrent into a list
torrent_filelist = info_dict.get('files')
used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding
parsed_file_tree = dict()
if not torrent_filelist:
# If single-file, the root will be the file-tree (no directory)
file_tree_root = parsed_file_tree
torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}]
else:
# If multi-file, use the directory name as root for files
file_tree_root = parsed_file_tree.setdefault(
info_dict['name'].decode(used_path_encoding), {})
# Parse file dicts into a tree
for file_dict in torrent_filelist:
# Decode path parts from utf8-bytes
path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']]
filename = path_parts.pop()
current_directory = file_tree_root
for directory in path_parts:
current_directory = current_directory.setdefault(directory, {})
# Don't add empty filenames (BitComet directory)
if filename:
current_directory[filename] = file_dict['length']
parsed_file_tree = utils.sorted_pathdict(parsed_file_tree)
json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8')
torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes)
db.session.add(torrent)
db.session.flush()
# Store the users trackers
trackers = OrderedSet()
announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii')
if announce:
trackers.add(announce)
# List of lists with single item
announce_list = torrent_data.torrent_dict.get('announce-list', [])
for announce in announce_list:
trackers.add(announce[0].decode('ascii'))
# Store webseeds
# qBittorrent doesn't omit url-list but sets it as '' even when there are no webseeds
webseed_list = torrent_data.torrent_dict.get('url-list') or []
if isinstance(webseed_list, bytes):
webseed_list = [webseed_list] # qB doesn't contain a sole url in a list
webseeds = OrderedSet(webseed.decode('utf-8') for webseed in webseed_list)
# Remove our trackers, maybe? TODO ?
# Search for/Add trackers in DB
db_trackers = OrderedSet()
for announce in trackers:
tracker = models.Trackers.by_uri(announce)
# Insert new tracker if not found
if not tracker:
tracker = models.Trackers(uri=announce)
db.session.add(tracker)
db.session.flush()
elif tracker.is_webseed:
# If we have an announce marked webseed (user error, malicy?), reset it.
# Better to have "bad" announces than "hiding" proper announces in webseeds/url-list.
tracker.is_webseed = False
db.session.flush()
db_trackers.add(tracker)
# Same for webseeds
for webseed_url in webseeds:
webseed = models.Trackers.by_uri(webseed_url)
if not webseed:
webseed = models.Trackers(uri=webseed_url, is_webseed=True)
db.session.add(webseed)
db.session.flush()
# Don't add trackers into webseeds
if webseed.is_webseed:
db_trackers.add(webseed)
# Store tracker refs in DB
for order, tracker in enumerate(db_trackers):
torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id,
tracker_id=tracker.id, order=order)
db.session.add(torrent_tracker)
# Before final commit, validate the torrent again
validate_torrent_post_upload(torrent, upload_form)
# Add to tracker whitelist
db.session.add(models.TrackerApi(torrent.info_hash, 'insert'))
db.session.commit()
# Store the actual torrent file as well
torrent_file = upload_form.torrent_file.data
if app.config.get('BACKUP_TORRENT_FOLDER'):
torrent_file.seek(0, 0)
torrent_dir = app.config['BACKUP_TORRENT_FOLDER']
os.makedirs(torrent_dir, exist_ok=True)
torrent_path = os.path.join(torrent_dir, '{}.{}'.format(
torrent.id, secure_filename(torrent_file.filename)))
torrent_file.save(torrent_path)
torrent_file.close()
return torrent
def _delete_info_dict(torrent):
info_dict_path = torrent.info_dict_path
if os.path.exists(info_dict_path):
os.remove(info_dict_path)