diff --git a/.docker/Dockerfile b/.docker/Dockerfile new file mode 100644 index 0000000..1f503fa --- /dev/null +++ b/.docker/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:18.04 + +ENV LANG=en_US.utf-8 LC_ALL=en_US.utf-8 DEBIAN_FRONTEND=noninteractive +RUN apt-get -y update + +COPY ./ /nyaa/ +RUN cat /nyaa/config.example.py /nyaa/.docker/nyaa-config-partial.py > /nyaa/config.py + +# Requirements for running the Flask app +RUN apt-get -y install build-essential git python3 python3-pip libmysqlclient-dev curl +# Helpful stuff for the docker entrypoint.sh script +RUN apt-get -y install mariadb-client netcat + +WORKDIR /nyaa +RUN pip3 install -r requirements.txt + +CMD ["/nyaa/.docker/entrypoint.sh"] diff --git a/.docker/README.md b/.docker/README.md new file mode 100644 index 0000000..b710dfa --- /dev/null +++ b/.docker/README.md @@ -0,0 +1,48 @@ +# Nyaa on Docker +> [!CAUTION] +> Docker deployment is out of date and currently unsupported in NyaaV3. + +Docker infrastructure is provided to ease setting up a dev environment + +## Quickstart + +Get started by running (from the root of the project): + + docker-compose -f .docker/full-stack.yml -p nyaa build nyaa-flask + docker-compose -f .docker/full-stack.yml -p nyaa up -d + +This builds the Flask app container, then starts up the project. You can then go +to [localhost:8080](http://localhost:8080/) (note that some of the +services are somewhat slow to start so it may not be available for 30s or so). + +You can shut it down with: + + docker-compose -f .docker/full-stack.yml -p nyaa down + +## Details + +The environment includes: + - [nginx frontend](http://localhost:8080/) (on port 8080) + - uwsgi running the flask app + - the ES<>MariaDB sync process + - MariaDB + - ElasticSearch + - [Kibana](http://localhost:8080/kibana/) (at /kibana/) + +MariaDB, ElasticSearch, the sync process, and uploaded torrents will +persistently store their data in volumes which makes future start ups faster. + +To make it more useful to develop with, you can copy `.docker/full-stack.yml` and +edit the copy and uncomment the `- "${NYAA_SRC_DIR}:/nyaa"` line, then +`export NYAA_SRC_DIR=$(pwd)` and start up the environment using the new compose +file: + + cp -a .docker/full-stack.yml .docker/local-dev.yml + cat config.example.py .docker/nyaa-config-partial.py > ./config.py + $EDITOR .docker/local-dev.yml + export NYAA_SRC_DIR=$(pwd) + docker-compose -f .docker/local-dev.yml -p nyaa up -d + +This will mount the local copy of the project files into the Flask container, +which combined with live-reloading in uWSGI should let you make changes and see +them take effect immediately (technically with a ~2 second delay). diff --git a/.docker/entrypoint-sync.sh b/.docker/entrypoint-sync.sh new file mode 100755 index 0000000..b58b646 --- /dev/null +++ b/.docker/entrypoint-sync.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# set +x + +pushd /nyaa + +echo 'Waiting for MySQL to start up' +while ! echo HELO | nc mariadb 3306 &>/dev/null; do + sleep 1 +done +echo 'DONE' + +echo 'Waiting for ES to start up' +while ! echo HELO | nc elasticsearch 9200 &>/dev/null; do + sleep 1 +done +echo 'DONE' + +echo 'Waiting for ES to be ready' +while ! curl -s -XGET 'elasticsearch:9200/_cluster/health?pretty=true&wait_for_status=green' &>/dev/null; do + sleep 1 +done +echo 'DONE' + +echo 'Waiting for sync data file to exist' +while ! [ -f /elasticsearch-sync/pos.json ]; do + sleep 1 +done +echo 'DONE' + +echo 'Starting the sync process' +/usr/bin/python3 /nyaa/sync_es.py /nyaa/.docker/es_sync_config.json diff --git a/.docker/entrypoint.sh b/.docker/entrypoint.sh new file mode 100755 index 0000000..7688739 --- /dev/null +++ b/.docker/entrypoint.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# set +x + +pushd /nyaa + +echo 'Waiting for MySQL to start up' +while ! echo HELO | nc mariadb 3306 &>/dev/null; do + sleep 1 +done +echo 'DONE' + +if ! [ -f /elasticsearch-sync/flag-db_create ]; then + python3 ./db_create.py + touch /elasticsearch-sync/flag-db_create +fi + +if ! [ -f /elasticsearch-sync/flag-db_migrate ]; then + python3 ./db_migrate.py stamp head + touch /elasticsearch-sync/flag-db_migrate +fi + +echo 'Waiting for ES to start up' +while ! echo HELO | nc elasticsearch 9200 &>/dev/null; do + sleep 1 +done +echo 'DONE' + +echo 'Waiting for ES to be ready' +while ! curl -s -XGET 'elasticsearch:9200/_cluster/health?pretty=true&wait_for_status=green' &>/dev/null; do + sleep 1 +done +echo 'DONE' + +if ! [ -f /elasticsearch-sync/flag-create_es ]; then + # @source create_es.sh + # create indices named "nyaa" and "sukebei", these are hardcoded + curl -v -XPUT 'elasticsearch:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml + curl -v -XPUT 'elasticsearch:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml + touch /elasticsearch-sync/flag-create_es +fi + +if ! [ -f /elasticsearch-sync/flag-import_to_es ]; then + python3 ./import_to_es.py | tee /elasticsearch-sync/import.out + grep -A1 'Save the following' /elasticsearch-sync/import.out | tail -1 > /elasticsearch-sync/pos.json + touch /elasticsearch-sync/flag-import_to_es +fi + +echo 'Starting the Flask app' +/usr/local/bin/uwsgi /nyaa/.docker/uwsgi.config.ini diff --git a/.docker/es_sync_config.json b/.docker/es_sync_config.json new file mode 100644 index 0000000..a6c7f9e --- /dev/null +++ b/.docker/es_sync_config.json @@ -0,0 +1,11 @@ +{ + "save_loc": "/elasticsearch-sync/pos.json", + "mysql_host": "mariadb", + "mysql_port": 3306, + "mysql_user": "nyaadev", + "mysql_password": "ZmtB2oihHFvc39JaEDoF", + "database": "nyaav3", + "internal_queue_depth": 10000, + "es_chunk_size": 10000, + "flush_interval": 5 +} diff --git a/.docker/full-stack.yml b/.docker/full-stack.yml new file mode 100644 index 0000000..f2fb11b --- /dev/null +++ b/.docker/full-stack.yml @@ -0,0 +1,71 @@ +--- + +version: "3" +services: + nginx: + image: nginx:1.15-alpine + ports: + - '8080:80' + volumes: + - './nginx.conf:/etc/nginx/nginx.conf:ro' + - '../nyaa/static:/nyaa-static:ro' + depends_on: + - nyaa-flask + - kibana + + nyaa-flask: + image: local/nyaa:devel + volumes: + - 'nyaa-torrents:/nyaa-torrents' + - 'nyaa-sync-data:/elasticsearch-sync' + ## Uncomment this line to have to mount the local dir to the running + ## instance for live changes (after setting NYAA_SRC_DIR env var) + # - "${NYAA_SRC_DIR}:/nyaa" + depends_on: + - mariadb + - elasticsearch + build: + context: ../ + dockerfile: ./.docker/Dockerfile + + nyaa-sync: + image: local/nyaa:devel + volumes: + - 'nyaa-sync-data:/elasticsearch-sync' + command: /nyaa/.docker/entrypoint-sync.sh + depends_on: + - mariadb + - elasticsearch + restart: on-failure + + mariadb: + image: mariadb:10.0 + volumes: + - './mariadb-init-sql:/docker-entrypoint-initdb.d:ro' + - '../configs/my.cnf:/etc/mysql/conf.d/50-binlog.cnf:ro' + - 'mariadb-data:/var/lib/mysql' + environment: + - MYSQL_RANDOM_ROOT_PASSWORD=yes + - MYSQL_USER=nyaadev + - MYSQL_PASSWORD=ZmtB2oihHFvc39JaEDoF + - MYSQL_DATABASE=nyaav3 + + elasticsearch: + image: elasticsearch:6.5.4 + volumes: + - elasticsearch-data:/usr/share/elasticsearch/data + depends_on: + - mariadb + + kibana: + image: kibana:6.5.4 + volumes: + - './kibana.config.yml:/usr/share/kibana/config/kibana.yml:ro' + depends_on: + - elasticsearch + +volumes: + nyaa-torrents: + nyaa-sync-data: + mariadb-data: + elasticsearch-data: diff --git a/.docker/kibana.config.yml b/.docker/kibana.config.yml new file mode 100644 index 0000000..da9dc09 --- /dev/null +++ b/.docker/kibana.config.yml @@ -0,0 +1,9 @@ +--- + +server.name: kibana +server.host: 'kibana' +server.basePath: /kibana +# server.rewriteBasePath: true +# server.defaultRoute: /kibana/app/kibana +elasticsearch.url: http://elasticsearch:9200 +xpack.monitoring.ui.container.elasticsearch.enabled: true diff --git a/.docker/mariadb-init-sql/.gitignore b/.docker/mariadb-init-sql/.gitignore new file mode 100644 index 0000000..aecee65 --- /dev/null +++ b/.docker/mariadb-init-sql/.gitignore @@ -0,0 +1 @@ +!*.sql diff --git a/.docker/mariadb-init-sql/50-grant-binlog-access.sql b/.docker/mariadb-init-sql/50-grant-binlog-access.sql new file mode 100644 index 0000000..81fac03 --- /dev/null +++ b/.docker/mariadb-init-sql/50-grant-binlog-access.sql @@ -0,0 +1,3 @@ +GRANT REPLICATION SLAVE ON *.* TO 'nyaadev'@'%'; +GRANT REPLICATION CLIENT ON *.* TO 'nyaadev'@'%'; +FLUSH PRIVILEGES; diff --git a/.docker/nginx.conf b/.docker/nginx.conf new file mode 100644 index 0000000..27f3757 --- /dev/null +++ b/.docker/nginx.conf @@ -0,0 +1,59 @@ + +user nginx; +worker_processes 1; + +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + + +events { + worker_connections 1024; +} + + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + charset utf-8; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 65; + + gzip on; + + server { + listen 80; + server_name localhost default; + + location /static { + alias /nyaa-static; + } + + # fix kibana redirecting to localhost/kibana (without the port) + rewrite ^/kibana$ http://$http_host/kibana/ permanent; + location /kibana/ { + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_cache_bypass $http_upgrade; + + proxy_set_header Host 'kibana'; + proxy_set_header X-Real-IP $remote_addr; + + proxy_pass http://kibana:5601/; + } + + location / { + include /etc/nginx/uwsgi_params; + uwsgi_pass nyaa-flask:5000; + } + } +} diff --git a/.docker/nyaa-config-partial.py b/.docker/nyaa-config-partial.py new file mode 100644 index 0000000..65a7616 --- /dev/null +++ b/.docker/nyaa-config-partial.py @@ -0,0 +1,10 @@ +# This is only a partial config file that will be appended to the end of +# config.example.py to build the full config for the docker environment + +SITE_NAME = 'Nyaa [DEVEL]' +GLOBAL_SITE_NAME = 'nyaa.devel' +SQLALCHEMY_DATABASE_URI = ('mysql://nyaadev:ZmtB2oihHFvc39JaEDoF@mariadb/nyaav3?charset=utf8mb4') +# MAIN_ANNOUNCE_URL = 'http://chihaya:6881/announce' +# TRACKER_API_URL = 'http://chihaya:6881/api' +BACKUP_TORRENT_FOLDER = '/nyaa-torrents' +ES_HOSTS = ['elasticsearch:9200'] diff --git a/.docker/uwsgi.config.ini b/.docker/uwsgi.config.ini new file mode 100644 index 0000000..7e8d620 --- /dev/null +++ b/.docker/uwsgi.config.ini @@ -0,0 +1,34 @@ +[uwsgi] +# socket = [addr:port] +socket = 0.0.0.0:5000 +#chmod-socket = 664 + +die-on-term = true + +# logging +#disable-logging = True +#logger = file:uwsgi.log + +# Base application directory +chdir = /nyaa + +# WSGI module and callable +# module = [wsgi_module_name]:[application_callable_name] +module = WSGI:app + +# master = [master process (true of false)] +master = true + +# debugging +catch-exceptions = true + +# performance +processes = 4 +buffer-size = 8192 + +loop = gevent +socket-timeout = 10 +gevent = 1000 +gevent-monkey-patch = true + +py-autoreload = 2 diff --git a/.github/issue_template.md b/.github/issue_template.md index dfe42b3..6419727 100644 --- a/.github/issue_template.md +++ b/.github/issue_template.md @@ -1,5 +1,3 @@ -Describe your issue/feature request here (you can remove all this text). Describe well and include images, if relevant! +Describe your issue/feature request here (you can remove all this text). Describe well and include images if relevant. -Please make sure to skim through the existing issues, your issue/request/etc may have already been noted! - -IMPORTANT: only submit issues that are relevant to the code. We do not offer support for any deployments of the project here; make your way to the IRC channel in such cases. +Please make sure to skim through the existing issues, as your issue/request/etc. may have already been noted! diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..72b55e1 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,100 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL Advanced" + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: '37 6 * * 4' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: javascript-typescript + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.gitignore b/.gitignore index 9f5059a..4fe76c2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,16 +14,20 @@ __pycache__ # Databases *.sql -test.db +/test.db # Webserver -uwsgi.sock +/uwsgi.sock # Application -install/* -config.py +/install/* +/config.py +/es_sync_config.json /test_torrent_batch -torrents + +# Build Output +nyaa/static/js/bootstrap-select.min.js +nyaa/static/js/main.min.js # Other *.swp diff --git a/.travis.yml b/.travis.yml index 89b21dd..8c6f6c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,8 @@ language: python -python: "3.6" +python: "3.13" -dist: trusty -sudo: required +dist: jammy matrix: fast_finish: true @@ -14,7 +13,7 @@ services: mysql before_install: - - mysql -u root -e 'CREATE DATABASE nyaav2 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;' + - mysql -u root -e 'CREATE DATABASE nyaav3 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;' install: - pip install -r requirements.txt diff --git a/README.md b/README.md index 93a4440..854bc11 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,54 @@ -# NyaaV2 [![Build Status](https://travis-ci.org/nyaadevs/nyaa.svg?branch=master)](https://travis-ci.org/nyaadevs/nyaa) +# NyaaV3 [![python](https://img.shields.io/badge/Python-3.13-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org) ![Maintenance](https://img.shields.io/maintenance/yes/2025) ## Setting up for development -This project uses Python 3.6. There are features used that do not exist in 3.5, so make sure to use Python 3.6. -This guide also assumes you 1) are using Linux and 2) are somewhat capable with the commandline. -It's not impossible to run Nyaa on Windows, but this guide doesn't focus on that. +This project uses Python 3.13. The codebase has been updated from the original Python 3.7 version to use modern Python features and updated dependencies. +This guide assumes you are using Linux and are somewhat capable with the commandline. +Running Nyaa on Windows may be possible, but it's currently unsupported. -### Code Quality: -- Before we get any deeper, remember to follow PEP8 style guidelines and run `./dev.py lint` before committing to see a list of warnings/problems. - - You may also use `./dev.py fix && ./dev.py isort` to automatically fix some of the issues reported by the previous command. +### Major changes from NyaaV2 +- Updated from Python 3.7 to Python 3.13 +- Updated all dependencies to their latest versions +- Modernized code patterns for Flask 3.0 and SQLAlchemy 2.0 +- Replaced deprecated Flask-Script, orderedset and `flask.Markup` with Flask CLI, orderly-set and markupsafe +- Implemented mail error handling + +### Code Quality +- Before we get any deeper, remember to follow PEP8 style guidelines and run `python dev.py lint` before committing to see a list of warnings/problems. + - You may also use `python dev.py fix && python dev.py isort` to automatically fix some of the issues reported by the previous command. - Other than PEP8, try to keep your code clean and easy to understand, as well. It's only polite! -### Running Tests +### Running tests The `tests` folder contains tests for the the `nyaa` module and the webserver. To run the tests: -- Make sure that you are in the python virtual environment. -- Run `./dev.py test` while in the repository directory. +- Make sure that you are in the Python virtual environment. +- Run `python dev.py test` while in the repository directory. ### Setting up Pyenv -pyenv eases the use of different Python versions, and as not all Linux distros offer 3.6 packages, it's right up our alley. -- Install dependencies https://github.com/pyenv/pyenv/wiki/Common-build-problems -- Install `pyenv` https://github.com/pyenv/pyenv/blob/master/README.md#installation -- Install `pyenv-virtualenv` https://github.com/pyenv/pyenv-virtualenv/blob/master/README.md -- Install Python 3.6.1 with `pyenv` and create a virtualenv for the project: - - `pyenv install 3.6.1` - - `pyenv virtualenv 3.6.1 nyaa` +pyenv eases the use of different Python versions, and as not all Linux distros offer 3.13 packages, it's right up our alley. +- Install [dependencies](https://github.com/pyenv/pyenv/wiki/Common-build-problems) +- Install [pyenv](https://github.com/pyenv/pyenv/blob/master/README.md#installation) +- Install [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv/blob/master/README.md) +- Install Python 3.13 with `pyenv` and create a virtualenv for the project: + - `pyenv install 3.13.2` + - `pyenv virtualenv 3.13.2 nyaa` - `pyenv activate nyaa` - Install dependencies with `pip install -r requirements.txt` - Copy `config.example.py` into `config.py` - Change `SITE_FLAVOR` in your `config.py` depending on which instance you want to host ### Setting up MySQL/MariaDB database -You *may* use SQLite but the current support for it in this project is outdated and rather unsupported. +> [!WARNING] +> You *may* use SQLite but it is currently untested and unsupported. - Enable `USE_MYSQL` flag in config.py -- Install latest mariadb by following instructions here https://downloads.mariadb.org/mariadb/repositories/ - - Tested versions: `mysql Ver 15.1 Distrib 10.0.30-MariaDB, for debian-linux-gnu (x86_64) using readline 5.2` +- Install MariaDB by following instructions [here](https://downloads.mariadb.org/mariadb/repositories/) - Run the following commands logged in as your root db user (substitute for your own `config.py` values if desired): - - `CREATE USER 'test'@'localhost' IDENTIFIED BY 'test123';` - - `GRANT ALL PRIVILEGES ON *.* TO 'test'@'localhost';` + - `CREATE USER 'nyaauser'@'localhost' IDENTIFIED BY 'nyaapass';` + - `CREATE DATABASE nyaav3 DEFAULT CHARACTER SET utf8 COLLATE utf8_bin;` + - `GRANT ALL PRIVILEGES ON nyaav3.* TO 'nyaauser'@'localhost';` - `FLUSH PRIVILEGES;` - - `CREATE DATABASE nyaav2 DEFAULT CHARACTER SET utf8 COLLATE utf8_bin;` ### Finishing up - Run `python db_create.py` to create the database and import categories - - Follow the advice of `db_create.py` and run `./db_migrate.py stamp head` to mark the database version for Alembic + - ~~Follow the advice of `db_create.py` and run `python db_migrate.py stamp head` to mark the database version for Alembic~~ - Start the dev server with `python run.py` - When you are finished developing, deactivate your virtualenv with `pyenv deactivate` or `source deactivate` (or just close your shell session) @@ -50,44 +57,35 @@ Continue below to learn about database migrations and enabling the advanced sear ## Database migrations -- Database migrations are done with [flask-Migrate](https://flask-migrate.readthedocs.io/), a wrapper around [Alembic](http://alembic.zzzcomputing.com/en/latest/). +> [!WARNING] +> The database migration feature has been updated but will no longer be supported in NyaaV3. +- Database migrations are done with [Flask-Migrate](https://flask-migrate.readthedocs.io/), a wrapper around [Alembic](http://alembic.zzzcomputing.com/en/latest/). +- The migration system has been updated to use Flask CLI instead of the deprecated Flask-Script. - If someone has made changes in the database schema and included a new migration script: - - If your database has never been marked by Alembic (you're on a database from before the migrations), run `./db_migrate.py stamp head` before pulling the new migration script(s). - - If you already have the new scripts, check the output of `./db_migrate.py history` instead and choose a hash that matches your current database state, then run `./db_migrate.py stamp `. + - If your database has never been marked by Alembic (you're on a database from before the migrations), run `python db_migrate.py db stamp head` before pulling the new migration script(s). + - If you already have the new scripts, check the output of `python db_migrate.py db history` instead and choose a hash that matches your current database state, then run `python db_migrate.py db stamp `. - Update your branch (eg. `git fetch && git rebase origin/master`) - - Run `./db_migrate.py upgrade head` to run the migration. Done! + - Run `python db_migrate.py db upgrade head` to run the migration. Done! - If *you* have made a change in the database schema: - Save your changes in `models.py` and ensure the database schema matches the previous version (ie. your new tables/columns are not added to the live database) - - Run `./db_migrate.py migrate -m "Short description of changes"` to automatically generate a migration script for the changes + - Run `python db_migrate.py db migrate -m "Short description of changes"` to automatically generate a migration script for the changes - Check the script (`migrations/versions/...`) and make sure it works! Alembic may not able to notice all changes. - - Run `./db_migrate.py upgrade` to run the migration and verify the upgrade works. - - (Run `./db_migrate.py downgrade` to verify the downgrade works as well, then upgrade again) + - Run `python db_migrate.py db upgrade` to run the migration and verify the upgrade works. + - (Run `python db_migrate.py db downgrade` to verify the downgrade works as well, then upgrade again) ## Setting up and enabling Elasticsearch ### Installing Elasticsearch - Install JDK with `sudo apt-get install openjdk-8-jdk` -- Install [Elasticsearch](https://www.elastic.co/downloads/elasticsearch) - - [From packages...](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html) +- Install Elasticsearch + - [From packages](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html) - Enable the service: - `sudo systemctl enable elasticsearch.service` - `sudo systemctl start elasticsearch.service` - - or [simply extracting the archives and running the files](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html), if you don't feel like permantently installing ES + - or [simply extracting the archives and running the files](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html), if you don't feel like permanently installing ES - Run `curl -XGET 'localhost:9200'` and make sure ES is running - - Optional: install [Kibana](https://www.elastic.co/products/kibana) as a search debug frontend for ES - -### Setting up ES -- Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei` - - The output should show `acknowledged: true` twice -- Stop the Nyaa app if you haven't already -- Run `python import_to_es.py` to import all the torrents (on nyaa and sukebei) into the ES indices. - - This may take some time to run if you have plenty of torrents in your database. - -Enable the `USE_ELASTIC_SEARCH` flag in `config.py` and (re)start the application. -Elasticsearch should now be functional! The ES indices won't be updated "live" with the current setup, continue below for instructions on how to hook Elasticsearch up to MySQL binlog. - -However, take note that binglog is not necessary for simple ES testing and development; you can simply run `import_to_es.py` from time to time to reindex all the torrents. +- Install [Kibana](https://www.elastic.co/products/kibana) as a search debug frontend for ES (*optional*) ### Enabling MySQL Binlogging - Edit your MariaDB/MySQL server configuration and add the following under `[mariadb]`: @@ -103,6 +101,18 @@ However, take note that binglog is not necessary for simple ES testing and devel - Verify that the result of `SHOW VARIABLES LIKE 'binlog_format';` is `ROW` - Execute `GRANT REPLICATION SLAVE ON *.* TO 'username'@'localhost';` to allow your configured user access to the binlog +### Setting up ES +- Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei` + - The output should show `acknowledged: true` twice +- Stop the Nyaa app if you haven't already +- Run `python import_to_es.py` to import all the torrents (on nyaa and sukebei) into the ES indices. + - This may take some time to run if you have plenty of torrents in your database. + +Enable the `USE_ELASTIC_SEARCH` flag in `config.py` and (re)start the application. +Elasticsearch should now be functional! The ES indices won't be updated "live" with the current setup, continue below for instructions on how to hook Elasticsearch up to MySQL binlog. + +However, take note that binglog is not necessary for simple ES testing and development; you can simply run `import_to_es.py` from time to time to reindex all the torrents. + ### Setting up sync_es.py `sync_es.py` keeps the Elasticsearch indices updated by reading the binlog and pushing the changes to the ES indices. @@ -113,3 +123,16 @@ However, take note that binglog is not necessary for simple ES testing and devel You're done! The script should now be feeding updates from the database to Elasticsearch. Take note, however, that the specified ES index refresh interval is 30 seconds, which may feel like a long time on local development. Feel free to adjust it or [poke Elasticsearch yourself!](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-refresh.html) + +## License +This project is licensed under the GNU General Public License v3.0 (GPL-3.0). See the [LICENSE](LICENSE) file for more details. + +## Disclaimer +> [!CAUTION] +> **This project was created as a learning experience, and while it's a torrent tracker, I can't control how people choose to use it.** + +By using this software, you're agreeing to a few things: +- I'm not responsible for any legal issues that might come up from using this tracker, especially if it's used to share copyrighted content without permission. +- It's your responsibility to make sure you're following the laws in your area when using this software. + +**Please use this project wisely and stay on the right side of the law.** Happy coding! diff --git a/WSGI.py b/WSGI.py index 6e080f8..83a2987 100644 --- a/WSGI.py +++ b/WSGI.py @@ -1,11 +1,16 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # -*- coding: utf-8 -*- +""" +WSGI entry point for the Nyaa application. +Compatible with Python 3.13. +""" import gevent.monkey gevent.monkey.patch_all() from nyaa import create_app +from flask import Flask -app = create_app('config') +app: Flask = create_app('config') if app.config['DEBUG']: from werkzeug.debug import DebuggedApplication diff --git a/config.example.py b/config.example.py index da87a9e..014e0e8 100644 --- a/config.example.py +++ b/config.example.py @@ -1,4 +1,5 @@ import os +import re DEBUG = True @@ -13,6 +14,16 @@ MAINTENANCE_MODE_MESSAGE = 'Site is currently in read-only maintenance mode.' # Allow logging in during maintenance (without updating last login date) MAINTENANCE_MODE_LOGINS = True +# Block *anonymous* uploads completely +RAID_MODE_LIMIT_UPLOADS = False +# Message prepended to the full error message (account.py) +RAID_MODE_UPLOADS_MESSAGE = 'Anonymous uploads are currently disabled.' + +# Require manual activation for newly registered accounts +RAID_MODE_LIMIT_REGISTER = False +# Message prepended to the full error message (account.py) +RAID_MODE_REGISTER_MESSAGE = 'Registration is currently being limited.' + ############# ## General ## ############# @@ -31,6 +42,12 @@ EXTERNAL_URLS = {'fap':'***', 'main':'***'} CSRF_SESSION_KEY = '***' SECRET_KEY = '***' +# Session cookie configuration +SESSION_COOKIE_NAME = 'nyaav3_session' +SESSION_COOKIE_SECURE = True +SESSION_COOKIE_HTTPONLY = True +SESSION_COOKIE_SAMESITE = 'Lax' + # Present a recaptcha for anonymous uploaders USE_RECAPTCHA = False # Require email validation @@ -44,13 +61,34 @@ ENABLE_SHOW_STATS = True # Depends on email support! ALLOW_PASSWORD_RESET = True +# A list of strings or compiled regexes to deny registering emails by. +# Regexes will be .search()'d against emails, +# while strings will be a simple 'string in email.lower()' check. +# Leave empty to disable the blacklist. +EMAIL_BLACKLIST = ( + # Hotmail completely rejects "untrusted" emails, + # so it's less of a headache to blacklist them as users can't receive the mails anyway. + # (Hopefully) complete list of Microsoft email domains follows: + re.compile(r'(?i)@hotmail\.(co|co\.uk|com|de|dk|eu|fr|it|net|org|se)'), + re.compile(r'(?i)@live\.(co|co.uk|com|de|dk|eu|fr|it|net|org|se|no)'), + re.compile(r'(?i)@outlook\.(at|be|cl|co|co\.(id|il|nz|th)|com|com\.(ar|au|au|br|gr|pe|tr|vn)|cz|de|de|dk|dk|es|eu|fr|fr|hu|ie|in|it|it|jp|kr|lv|my|org|ph|pt|sa|se|sg|sk)'), + re.compile(r'(?i)@(msn\.com|passport\.(com|net))'), + # '@dodgydomain.tk' +) +EMAIL_SERVER_BLACKLIST = ( + # Bad mailserver IPs here (MX server.com -> A mail.server.com > 11.22.33.44) + # '1.2.3.4', '11.22.33.44' +) + + + # Recaptcha keys (https://www.google.com/recaptcha) RECAPTCHA_PUBLIC_KEY = '***' RECAPTCHA_PRIVATE_KEY = '***' BASE_DIR = os.path.abspath(os.path.dirname(__file__)) if USE_MYSQL: - SQLALCHEMY_DATABASE_URI = ('mysql://test:test123@localhost/nyaav2?charset=utf8mb4') + SQLALCHEMY_DATABASE_URI = ('mysql://test:test123@localhost/nyaav3?charset=utf8mb4') else: SQLALCHEMY_DATABASE_URI = ( 'sqlite:///' + os.path.join(BASE_DIR, 'test.db') + '?check_same_thread=False') @@ -107,6 +145,10 @@ MINIMUM_ANONYMOUS_TORRENT_SIZE = 1 * 1024 * 1024 # Relies on USE_RECAPTCHA. Set to 0 to disable. ACCOUNT_RECAPTCHA_AGE = 7 * 24 * 3600 # A week +# Seconds after which an IP is allowed to register another account +# (0 disables the limitation) +PER_IP_ACCOUNT_COOLDOWN = 24 * 3600 + # Backup original .torrent uploads BACKUP_TORRENT_FOLDER = 'torrents' @@ -117,6 +159,16 @@ BACKUP_TORRENT_FOLDER = 'torrents' # How many results should a page contain. Applies to RSS as well. RESULTS_PER_PAGE = 75 +# How many pages we'll return at most +MAX_PAGES = 100 + +# How long and how many entries to cache for count queries +COUNT_CACHE_SIZE = 256 +COUNT_CACHE_DURATION = 30 + +# Use baked queries for database search +USE_BAKED_SEARCH = False + # Use better searching with ElasticSearch # See README.MD on setup! USE_ELASTIC_SEARCH = False @@ -127,6 +179,8 @@ ENABLE_ELASTIC_SEARCH_HIGHLIGHT = False ES_MAX_SEARCH_RESULT = 1000 # ES index name generally (nyaa or sukebei) ES_INDEX_NAME = SITE_FLAVOR +# ES hosts +ES_HOSTS = ['localhost:9200'] ################ ## Commenting ## @@ -135,3 +189,48 @@ ES_INDEX_NAME = SITE_FLAVOR # Time limit for editing a comment after it has been posted (seconds) # Set to 0 to disable EDITING_TIME_LIMIT = 0 + +# Whether to use Gravatar or just always use the default avatar +# (Useful if run as development instance behind NAT/firewall) +ENABLE_GRAVATAR = True + +########################## +## Trusted Requirements ## +########################## + +# Minimum number of uploads the user needs to have in order to apply for trusted +TRUSTED_MIN_UPLOADS = 10 +# Minimum number of cumulative downloads the user needs to have across their +# torrents in order to apply for trusted +TRUSTED_MIN_DOWNLOADS = 10000 +# Number of days an applicant needs to wait before re-applying +TRUSTED_REAPPLY_COOLDOWN = 90 + +########### +## Cache ## +########### + +# Interesting types include "simple", "redis" and "uwsgi" +# See https://pythonhosted.org/Flask-Caching/#configuring-flask-caching +CACHE_TYPE = "simple" + +# Maximum number of items the cache will store +# Only applies to "simple" and "filesystem" cache types +CACHE_THRESHOLD = 8192 + +# If you want to use redis, try this +# CACHE_TYPE = "redis" +# CACHE_REDIS_HOST = "127.0.0.1" +# CACHE_KEY_PREFIX = "catcache_" + + +############### +## Ratelimit ## +############### + +# To actually make this work across multiple worker processes, use redis +# RATELIMIT_STORAGE_URL="redis://host:port" +RATELIMIT_KEY_PREFIX="nyaaratelimit_" + +# Use this to show the commit hash in the footer (see layout.html) +# COMMIT_HASH="[enter your commit hash here]"; \ No newline at end of file diff --git a/create_es.sh b/create_es.sh index 5b0c564..42aaa7a 100755 --- a/create_es.sh +++ b/create_es.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash +set -e -# create indicies named "nyaa" and "sukebei", these are hardcoded +# create indices named "nyaa" and "sukebei", these are hardcoded curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml curl -v -XPUT 'localhost:9200/sukebei?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml diff --git a/db_create.py b/db_create.py index 30fe4fe..0eaad9f 100755 --- a/db_create.py +++ b/db_create.py @@ -1,12 +1,19 @@ #!/usr/bin/env python3 +""" +Database creation script for Nyaa. +Compatible with Python 3.13 and SQLAlchemy 2.0. +""" +from typing import List, Tuple, Type + import sqlalchemy +from sqlalchemy import select from nyaa import create_app, models from nyaa.extensions import db app = create_app('config') -NYAA_CATEGORIES = [ +NYAA_CATEGORIES: List[Tuple[str, List[str]]] = [ ('Anime', ['Anime Music Video', 'English-translated', 'Non-English-translated', 'Raw']), ('Audio', ['Lossless', 'Lossy']), ('Literature', ['English-translated', 'Non-English-translated', 'Raw']), @@ -16,13 +23,23 @@ NYAA_CATEGORIES = [ ] -SUKEBEI_CATEGORIES = [ +SUKEBEI_CATEGORIES: List[Tuple[str, List[str]]] = [ ('Art', ['Anime', 'Doujinshi', 'Games', 'Manga', 'Pictures']), ('Real Life', ['Photobooks / Pictures', 'Videos']), ] -def add_categories(categories, main_class, sub_class): +def add_categories(categories: List[Tuple[str, List[str]]], + main_class: Type[models.MainCategoryBase], + sub_class: Type[models.SubCategoryBase]) -> None: + """ + Add categories to the database. + + Args: + categories: List of tuples containing main category name and list of subcategory names + main_class: Main category model class + sub_class: Subcategory model class + """ for main_cat_name, sub_cat_names in categories: main_cat = main_class(name=main_cat_name) for i, sub_cat_name in enumerate(sub_cat_names): @@ -36,19 +53,24 @@ if __name__ == '__main__': # Test for the user table, assume db is empty if it's not created database_empty = False try: - models.User.query.first() + stmt = select(models.User).limit(1) + db.session.execute(stmt).scalar_one_or_none() except (sqlalchemy.exc.ProgrammingError, sqlalchemy.exc.OperationalError): database_empty = True print('Creating all tables...') db.create_all() - nyaa_category_test = models.NyaaMainCategory.query.first() + # Check if Nyaa categories exist + stmt = select(models.NyaaMainCategory).limit(1) + nyaa_category_test = db.session.execute(stmt).scalar_one_or_none() if not nyaa_category_test: print('Adding Nyaa categories...') add_categories(NYAA_CATEGORIES, models.NyaaMainCategory, models.NyaaSubCategory) - sukebei_category_test = models.SukebeiMainCategory.query.first() + # Check if Sukebei categories exist + stmt = select(models.SukebeiMainCategory).limit(1) + sukebei_category_test = db.session.execute(stmt).scalar_one_or_none() if not sukebei_category_test: print('Adding Sukebei categories...') add_categories(SUKEBEI_CATEGORIES, models.SukebeiMainCategory, models.SukebeiSubCategory) diff --git a/db_migrate.py b/db_migrate.py index 92789f6..928ea1d 100755 --- a/db_migrate.py +++ b/db_migrate.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +""" +Database migration script for Nyaa. +Compatible with Python 3.13 and Flask-Migrate 4.0. +""" import sys +from typing import List -from flask_script import Manager -from flask_migrate import Migrate, MigrateCommand +from flask_migrate import Migrate +from flask.cli import FlaskGroup from nyaa import create_app from nyaa.extensions import db @@ -11,11 +16,17 @@ from nyaa.extensions import db app = create_app('config') migrate = Migrate(app, db) -manager = Manager(app) -manager.add_command("db", MigrateCommand) +def create_cli_app(): + return app + +cli = FlaskGroup(create_app=create_cli_app) if __name__ == "__main__": # Patch sys.argv to default to 'db' - sys.argv.insert(1, 'db') + if len(sys.argv) > 1 and sys.argv[1] not in ['--help', '-h']: + if sys.argv[1] not in ['db', 'routes', 'shell', 'run']: + args: List[str] = sys.argv.copy() + args.insert(1, 'db') + sys.argv = args - manager.run() + cli() diff --git a/dev.py b/dev.py index 7560784..d223370 100755 --- a/dev.py +++ b/dev.py @@ -4,8 +4,11 @@ This tool is designed to assist developers run common tasks, such as checking the code for lint issues, auto fixing some lint issues and running tests. It imports modules lazily (as-needed basis), so it runs faster! + +Compatible with Python 3.13. """ import sys +from typing import List, Optional, Generator, Any, Union LINT_PATHS = [ 'nyaa/', @@ -14,14 +17,14 @@ LINT_PATHS = [ TEST_PATHS = ['tests'] -def print_cmd(cmd, args): +def print_cmd(cmd: str, args: List[str]) -> None: """ Prints the command and args as you would run them manually. """ print('Running: {0}\n'.format( ' '.join([('\'' + a + '\'' if ' ' in a else a) for a in [cmd] + args]))) sys.stdout.flush() # Make sure stdout is flushed before continuing. -def check_config_values(): +def check_config_values() -> bool: """ Verify that all max_line_length values match. """ import configparser config = configparser.ConfigParser() @@ -32,7 +35,7 @@ def check_config_values(): autopep8 = config.get('pycodestyle', 'max_line_length', fallback=None) isort = config.get('isort', 'line_length', fallback=None) - values = (v for v in (flake8, autopep8, isort) if v is not None) + values: Generator[str, None, None] = (v for v in (flake8, autopep8, isort) if v is not None) found = next(values, False) if not found: print('Warning: No max line length setting set in setup.cfg.') @@ -44,7 +47,7 @@ def check_config_values(): return True -def print_help(): +def print_help() -> int: print('Nyaa Development Helper') print('=======================\n') print('Usage: {0} command [different arguments]'.format(sys.argv[0])) @@ -62,7 +65,7 @@ def print_help(): if __name__ == '__main__': - assert sys.version_info >= (3, 6), "Python 3.6 is required" + assert sys.version_info >= (3, 13), "Python 3.13 is required" check_config_values() diff --git a/es_mapping.yml b/es_mapping.yml index 4d4e39b..28462f6 100644 --- a/es_mapping.yml +++ b/es_mapping.yml @@ -10,7 +10,6 @@ settings: char_filter: - my_char_filter filter: - - standard - lowercase my_index_analyzer: type: custom @@ -20,9 +19,15 @@ settings: filter: - resolution - lowercase - - my_ngram - word_delimit + - my_ngram - trim_zero + - unique + # For exact matching - separate each character for substring matching + lowercase + exact_analyzer: + tokenizer: exact_tokenizer + filter: + - lowercase # For matching full words longer than the ngram limit (15 chars) my_fullword_index_analyzer: type: custom @@ -32,13 +37,27 @@ settings: filter: - lowercase - word_delimit - # These should be enough, as my_index_analyzer will match the rest + # Skip tokens shorter than N characters, + # since they're already indexed in the main field + - fullword_min + - unique + + tokenizer: + # Splits input into characters, for exact substring matching + exact_tokenizer: + type: pattern + pattern: "(.)" + group: 1 filter: my_ngram: - type: edgeNGram + type: edge_ngram min_gram: 1 max_gram: 15 + fullword_min: + type: length + # Remember to change this if you change the max_gram below! + min: 16 resolution: type: pattern_capture patterns: ["(\\d+)[xX](\\d+)"] @@ -46,9 +65,13 @@ settings: type: pattern_capture patterns: ["0*([0-9]*)"] word_delimit: - type: word_delimiter + type: word_delimiter_graph preserve_original: true split_on_numerics: false + # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html#word-delimiter-graph-tokenfilter-configure-parms + # since we're using "trim" filters downstream, otherwise + # you get weird lucene errors about startOffset + adjust_offsets: false char_filter: my_char_filter: type: mapping @@ -58,66 +81,65 @@ settings: # plus replicas don't really help either. number_of_shards: 1 number_of_replicas : 0 - mapper: - # disable elasticsearch's "helpful" autoschema - dynamic: false - # since we disabled the _all field, default query the - # name of the torrent. query: default_field: display_name mappings: - torrent: - # don't want everything concatenated - _all: - enabled: false - properties: - id: - type: long - display_name: - # TODO could do a fancier tokenizer here to parse out the - # the scene convention of stuff in brackets, plus stuff like k-on - type: text - analyzer: my_index_analyzer - fielddata: true # Is this required? - fields: - # Multi-field for full-word matching (when going over ngram limits) - # Note: will have to be queried for, not automatic - fullword: - type: text - analyzer: my_fullword_index_analyzer - created_time: - type: date - # Only in the ES index for generating magnet links - info_hash: - enabled: false - filesize: - type: long - anonymous: - type: boolean - trusted: - type: boolean - remake: - type: boolean - complete: - type: boolean - hidden: - type: boolean - deleted: - type: boolean - has_torrent: - type: boolean - download_count: - type: long - leech_count: - type: long - seed_count: - type: long - comment_count: - type: long - # these ids are really only for filtering, thus keyword - uploader_id: - type: keyword - main_category_id: - type: keyword - sub_category_id: - type: keyword \ No newline at end of file + # disable elasticsearch's "helpful" autoschema + dynamic: false + properties: + id: + type: long + display_name: + # TODO could do a fancier tokenizer here to parse out the + # the scene convention of stuff in brackets, plus stuff like k-on + type: text + analyzer: my_index_analyzer + fielddata: true # Is this required? + fields: + # Multi-field for full-word matching (when going over ngram limits) + # Note: will have to be queried for, not automatic + fullword: + type: text + analyzer: my_fullword_index_analyzer + # Stored for exact phrase matching + exact: + type: text + analyzer: exact_analyzer + created_time: + type: date + # + # Only in the ES index for generating magnet links + info_hash: + type: keyword + index: false + filesize: + type: long + anonymous: + type: boolean + trusted: + type: boolean + remake: + type: boolean + complete: + type: boolean + hidden: + type: boolean + deleted: + type: boolean + has_torrent: + type: boolean + download_count: + type: long + leech_count: + type: long + seed_count: + type: long + comment_count: + type: long + # these ids are really only for filtering, thus keyword + uploader_id: + type: keyword + main_category_id: + type: keyword + sub_category_id: + type: keyword diff --git a/es_sync_config.example.json b/es_sync_config.example.json index b2dc524..658c101 100644 --- a/es_sync_config.example.json +++ b/es_sync_config.example.json @@ -4,7 +4,7 @@ "mysql_port": 3306, "mysql_user": "nyaa", "mysql_password": "some_password", - "database": "nyaav2", + "database": "nyaav3", "internal_queue_depth": 10000, "es_chunk_size": 10000, "flush_interval": 5 diff --git a/import_to_es.py b/import_to_es.py index ee1ec72..fbd25b2 100755 --- a/import_to_es.py +++ b/import_to_es.py @@ -1,6 +1,6 @@ #!/usr/bin/env python """ -Bulk load torents from mysql into elasticsearch `nyaav2` index, +Bulk load torents from mysql into elasticsearch `nyaav3` index, which is assumed to already exist. This is a one-shot deal, so you'd either need to complement it with a cron job or some binlog-reading thing (TODO) @@ -18,9 +18,12 @@ from nyaa import create_app, models from nyaa.extensions import db app = create_app('config') -es = Elasticsearch(timeout=30) +es = Elasticsearch(hosts=app.config['ES_HOSTS'], timeout=30) ic = IndicesClient(es) +def pad_bytes(in_bytes, size): + return in_bytes + (b'\x00' * max(0, size - len(in_bytes))) + # turn into thing that elasticsearch indexes. We flatten in # the stats (seeders/leechers) so we can order by them in es naturally. # we _don't_ dereference uploader_id to the user's display name however, @@ -31,7 +34,6 @@ ic = IndicesClient(es) def mk_es(t, index_name): return { "_id": t.id, - "_type": "torrent", "_index": index_name, "_source": { # we're also indexing the id as a number so you can @@ -42,7 +44,7 @@ def mk_es(t, index_name): "created_time": t.created_time, # not analyzed but included so we can render magnet links # without querying sql again. - "info_hash": t.info_hash.hex(), + "info_hash": pad_bytes(t.info_hash, 20).hex(), "filesize": t.filesize, "uploader_id": t.uploader_id, "main_category_id": t.main_category_id, diff --git a/info_dicts/.gitignore b/info_dicts/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/info_dicts/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/migrations/README b/migrations/README deleted file mode 100644 index 98e4f9c..0000000 --- a/migrations/README +++ /dev/null @@ -1 +0,0 @@ -Generic single-database configuration. \ No newline at end of file diff --git a/migrations/README.md b/migrations/README.md new file mode 100644 index 0000000..e9c5557 --- /dev/null +++ b/migrations/README.md @@ -0,0 +1,4 @@ +> [!WARNING] +> No longer supported in NyaaV3. + +Generic single-database configuration. \ No newline at end of file diff --git a/migrations/versions/5cbcee17bece_add_trusted_applications.py b/migrations/versions/5cbcee17bece_add_trusted_applications.py new file mode 100644 index 0000000..29167cb --- /dev/null +++ b/migrations/versions/5cbcee17bece_add_trusted_applications.py @@ -0,0 +1,47 @@ +"""Add trusted applications + +Revision ID: 5cbcee17bece +Revises: 8a6a7662eb37 +Create Date: 2018-11-05 15:16:07.497898 + +""" +from alembic import op +import sqlalchemy as sa +import sqlalchemy_utils + + +# revision identifiers, used by Alembic. +revision = '5cbcee17bece' +down_revision = '8a6a7662eb37' +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table('trusted_applications', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('submitter_id', sa.Integer(), nullable=False, index=True), + sa.Column('created_time', sa.DateTime(), nullable=True), + sa.Column('closed_time', sa.DateTime(), nullable=True), + sa.Column('why_want', sa.String(length=4000), nullable=False), + sa.Column('why_give', sa.String(length=4000), nullable=False), + sa.Column('status', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['submitter_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('trusted_reviews', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('reviewer_id', sa.Integer(), nullable=False), + sa.Column('app_id', sa.Integer(), nullable=False), + sa.Column('created_time', sa.DateTime(), nullable=True), + sa.Column('comment', sa.String(length=4000), nullable=False), + sa.Column('recommendation', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['app_id'], ['trusted_applications.id'], ), + sa.ForeignKeyConstraint(['reviewer_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('id') + ) + + +def downgrade(): + op.drop_table('trusted_reviews') + op.drop_table('trusted_applications') diff --git a/migrations/versions/6cc823948c5a_add_trackerapi.py b/migrations/versions/6cc823948c5a_add_trackerapi.py new file mode 100644 index 0000000..d40e954 --- /dev/null +++ b/migrations/versions/6cc823948c5a_add_trackerapi.py @@ -0,0 +1,40 @@ +"""Add trackerapi table + +Revision ID: 6cc823948c5a +Revises: b61e4f6a88cc +Create Date: 2018-02-11 20:57:15.244171 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '6cc823948c5a' +down_revision = 'b61e4f6a88cc' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('nyaa_trackerapi', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('info_hash', sa.BINARY(length=20), nullable=False), + sa.Column('method', sa.String(length=255), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('sukebei_trackerapi', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('info_hash', sa.BINARY(length=20), nullable=False), + sa.Column('method', sa.String(length=255), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('sukebei_trackerapi') + op.drop_table('nyaa_trackerapi') + # ### end Alembic commands ### diff --git a/migrations/versions/8a6a7662eb37_add_user_preferences_table.py b/migrations/versions/8a6a7662eb37_add_user_preferences_table.py new file mode 100644 index 0000000..48ca434 --- /dev/null +++ b/migrations/versions/8a6a7662eb37_add_user_preferences_table.py @@ -0,0 +1,39 @@ +"""Add user preferences table + +Revision ID: 8a6a7662eb37 +Revises: f703f911d4ae +Create Date: 2018-11-20 17:02:26.408532 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '8a6a7662eb37' +down_revision = 'f703f911d4ae' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('user_preferences', + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('hide_comments', sa.Boolean(), server_default=sa.sql.expression.false(), nullable=False), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('user_id') + ) + + connection = op.get_bind() + + print('Populating user_preferences...') + connection.execute(sa.sql.text('INSERT INTO user_preferences (user_id) SELECT id FROM users')) + print('Done.') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('user_preferences') + # ### end Alembic commands ### diff --git a/migrations/versions/b61e4f6a88cc_del_torrents_info.py b/migrations/versions/b61e4f6a88cc_del_torrents_info.py new file mode 100644 index 0000000..e3bd4de --- /dev/null +++ b/migrations/versions/b61e4f6a88cc_del_torrents_info.py @@ -0,0 +1,57 @@ +"""Remove bencoded info dicts from mysql + +Revision ID: b61e4f6a88cc +Revises: cf7bf6d0e6bd +Create Date: 2017-08-29 01:45:08.357936 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql +import sys + +# revision identifiers, used by Alembic. +revision = 'b61e4f6a88cc' +down_revision = 'cf7bf6d0e6bd' +branch_labels = None +depends_on = None + + +def upgrade(): + print("--- WARNING ---") + print("This migration drops the torrent_info tables.") + print("You will lose all of your .torrent files if you have not converted them beforehand.") + print("Use the migration script at utils/infodict_mysql2file.py") + print("Type OKAY and hit Enter to continue, CTRL-C to abort.") + print("--- WARNING ---") + try: + if input() != "OKAY": + sys.exit(1) + except KeyboardInterrupt: + sys.exit(1) + + op.drop_table('sukebei_torrents_info') + op.drop_table('nyaa_torrents_info') + + +def downgrade(): + op.create_table('nyaa_torrents_info', + sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True), + sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['torrent_id'], ['nyaa_torrents.id'], name='nyaa_torrents_info_ibfk_1', ondelete='CASCADE'), + sa.PrimaryKeyConstraint('torrent_id'), + mysql_collate='utf8_bin', + mysql_default_charset='utf8', + mysql_engine='InnoDB', + mysql_row_format='COMPRESSED' + ) + op.create_table('sukebei_torrents_info', + sa.Column('info_dict', mysql.MEDIUMBLOB(), nullable=True), + sa.Column('torrent_id', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False), + sa.ForeignKeyConstraint(['torrent_id'], ['sukebei_torrents.id'], name='sukebei_torrents_info_ibfk_1', ondelete='CASCADE'), + sa.PrimaryKeyConstraint('torrent_id'), + mysql_collate='utf8_bin', + mysql_default_charset='utf8', + mysql_engine='InnoDB', + mysql_row_format='COMPRESSED' + ) diff --git a/migrations/versions/f69d7fec88d6_add_rangebans.py b/migrations/versions/f69d7fec88d6_add_rangebans.py new file mode 100644 index 0000000..9011744 --- /dev/null +++ b/migrations/versions/f69d7fec88d6_add_rangebans.py @@ -0,0 +1,40 @@ +"""add rangebans + +Revision ID: f69d7fec88d6 +Revises: 6cc823948c5a +Create Date: 2018-06-01 14:01:49.596007 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'f69d7fec88d6' +down_revision = '6cc823948c5a' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('rangebans', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('cidr_string', sa.String(length=18), nullable=False), + sa.Column('masked_cidr', sa.BigInteger(), nullable=False), + sa.Column('mask', sa.BigInteger(), nullable=False), + sa.Column('enabled', sa.Boolean(), nullable=False), + sa.Column('temp', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_rangebans_mask'), 'rangebans', ['mask'], unique=False) + op.create_index(op.f('ix_rangebans_masked_cidr'), 'rangebans', ['masked_cidr'], unique=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f('ix_rangebans_masked_cidr'), table_name='rangebans') + op.drop_index(op.f('ix_rangebans_mask'), table_name='rangebans') + op.drop_table('rangebans') + # ### end Alembic commands ### diff --git a/migrations/versions/f703f911d4ae_add_registration_ip.py b/migrations/versions/f703f911d4ae_add_registration_ip.py new file mode 100644 index 0000000..784415d --- /dev/null +++ b/migrations/versions/f703f911d4ae_add_registration_ip.py @@ -0,0 +1,28 @@ +"""add registration IP + +Revision ID: f703f911d4ae +Revises: f69d7fec88d6 +Create Date: 2018-07-09 13:04:50.652781 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'f703f911d4ae' +down_revision = 'f69d7fec88d6' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('users', sa.Column('registration_ip', sa.Binary(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('users', 'registration_ip') + # ### end Alembic commands ### diff --git a/nyaa/__init__.py b/nyaa/__init__.py index f2d33fc..e62ec22 100644 --- a/nyaa/__init__.py +++ b/nyaa/__init__.py @@ -1,22 +1,36 @@ import logging import os import string +from typing import Any, Optional import flask +from flask import Flask from flask_assets import Bundle # noqa F401 from nyaa.api_handler import api_blueprint -from nyaa.extensions import assets, db, fix_paginate, toolbar +from nyaa.extensions import assets, cache, db, fix_paginate, limiter, toolbar from nyaa.template_utils import bp as template_utils_bp +from nyaa.template_utils import caching_url_for from nyaa.utils import random_string from nyaa.views import register_views +# Replace the Flask url_for with our cached version, since there's no real harm in doing so +# (caching_url_for has stored a reference to the OG url_for, so we won't recurse) +# Touching globals like this is a bit dirty, but nicer than replacing every url_for usage +flask.url_for = caching_url_for -def create_app(config): + +def create_app(config: Any) -> Flask: """ Nyaa app factory """ app = flask.Flask(__name__) app.config.from_object(config) + # Session cookie configuration + app.config['SESSION_COOKIE_NAME'] = 'nyaav3_session' + app.config['SESSION_COOKIE_SECURE'] = True + app.config['SESSION_COOKIE_HTTPONLY'] = True + app.config['SESSION_COOKIE_SAMESITE'] = 'Lax' + # Don't refresh cookie each request app.config['SESSION_REFRESH_EACH_REQUEST'] = False @@ -28,11 +42,24 @@ def create_app(config): # Forbid caching @app.after_request - def forbid_cache(request): - request.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate, max-age=0' - request.headers['Pragma'] = 'no-cache' - request.headers['Expires'] = '0' - return request + def forbid_cache(response: flask.Response) -> flask.Response: + response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate, max-age=0' + response.headers['Pragma'] = 'no-cache' + response.headers['Expires'] = '0' + return response + + # Add a timer header to the requests when debugging + # This gives us a simple way to benchmark requests off-app + import time + + @app.before_request + def timer_before_request() -> None: + flask.g.request_start_time = time.time() + + @app.after_request + def timer_after_request(response: flask.Response) -> flask.Response: + response.headers['X-Timer'] = str(time.time() - flask.g.request_start_time) + return response else: app.logger.setLevel(logging.WARNING) @@ -44,17 +71,17 @@ def create_app(config): app.config['LOG_FILE'], maxBytes=10000, backupCount=1) app.logger.addHandler(app.log_handler) - # Log errors and display a message to the user in production mdode + # Log errors and display a message to the user in production mode if not app.config['DEBUG']: @app.errorhandler(500) - def internal_error(exception): + def internal_error(exception: Exception) -> flask.Response: random_id = random_string(8, string.ascii_uppercase + string.digits) # Pst. Not actually unique, but don't tell anyone! - app.logger.error('Exception occurred! Unique ID: %s', random_id, exc_info=exception) + app.logger.error(f'Exception occurred! Unique ID: {random_id}', exc_info=exception) markup_source = ' '.join([ 'An error occurred!', 'Debug information has been logged.', - 'Please pass along this ID: {}'.format(random_id) + f'Please pass along this ID: {random_id}' ]) flask.flash(flask.Markup(markup_source), 'danger') @@ -73,14 +100,29 @@ def create_app(config): app.jinja_env.lstrip_blocks = True app.jinja_env.trim_blocks = True + # The default jinja_env has the OG Flask url_for (from before we replaced it), + # so update the globals with our version + app.jinja_env.globals['url_for'] = flask.url_for + # Database fix_paginate() # This has to be before the database is initialized app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False app.config['MYSQL_DATABASE_CHARSET'] = 'utf8mb4' db.init_app(app) + + # Import the fixed Ban.banned method + with app.app_context(): + import nyaa.fixed_ban # Assets assets.init_app(app) + if hasattr(assets, '_named_bundles'): + assets._named_bundles = {} # Hack to fix state carrying over in tests + main_js = Bundle('js/main.js', filters='rjsmin', output='js/main.min.js') + bs_js = Bundle('js/bootstrap-select.js', filters='rjsmin', + output='js/bootstrap-select.min.js') + assets.register('main_js', main_js) + assets.register('bs_js', bs_js) # css = Bundle('style.scss', filters='libsass', # output='style.css', depends='**/*.scss') # assets.register('style_all', css) @@ -90,4 +132,16 @@ def create_app(config): app.register_blueprint(api_blueprint) register_views(app) + # Pregenerate some URLs to avoid repeat url_for calls + if 'SERVER_NAME' in app.config and app.config['SERVER_NAME']: + with app.app_context(): + url = flask.url_for('static', filename='img/avatar/default.png', _external=True) + app.config['DEFAULT_GRAVATAR_URL'] = url + + # Cache + cache.init_app(app, config=app.config) + + # Rate Limiting, reads app.config itself + limiter.init_app(app) + return app diff --git a/nyaa/api_handler.py b/nyaa/api_handler.py index 10a8608..058b420 100644 --- a/nyaa/api_handler.py +++ b/nyaa/api_handler.py @@ -1,13 +1,11 @@ import binascii import functools import json -import os.path import re import flask -from nyaa import backend, bencode, forms, models, utils -from nyaa.extensions import db +from nyaa import backend, forms, models from nyaa.views.torrents import _create_upload_category_choices api_blueprint = flask.Blueprint('api', __name__, url_prefix='/api') @@ -120,142 +118,6 @@ def v2_api_upload(): return flask.jsonify({'errors': mapped_errors}), 400 -# #################################### TEMPORARY #################################### - -from orderedset import OrderedSet # noqa: E402 isort:skip - - -@api_blueprint.route('/ghetto_import', methods=['POST']) -def ghetto_import(): - if flask.request.remote_addr != '127.0.0.1': - return flask.error(403) - - torrent_file = flask.request.files.get('torrent') - - try: - torrent_dict = bencode.decode(torrent_file) - # field.data.close() - except (bencode.MalformedBencodeException, UnicodeError): - return 'Malformed torrent file', 500 - - try: - forms._validate_torrent_metadata(torrent_dict) - except AssertionError as e: - return 'Malformed torrent metadata ({})'.format(e.args[0]), 500 - - try: - tracker_found = forms._validate_trackers(torrent_dict) # noqa F841 - except AssertionError as e: - return 'Malformed torrent trackers ({})'.format(e.args[0]), 500 - - bencoded_info_dict = bencode.encode(torrent_dict['info']) - info_hash = utils.sha1_hash(bencoded_info_dict) - - # Check if the info_hash exists already in the database - torrent = models.Torrent.by_info_hash(info_hash) - if not torrent: - return 'This torrent does not exists', 500 - - if torrent.has_torrent: - return 'This torrent already has_torrent', 500 - - # Torrent is legit, pass original filename and dict along - torrent_data = forms.TorrentFileData(filename=os.path.basename(torrent_file.filename), - torrent_dict=torrent_dict, - info_hash=info_hash, - bencoded_info_dict=bencoded_info_dict) - - # The torrent has been validated and is safe to access with ['foo'] etc - all relevant - # keys and values have been checked for (see UploadForm in forms.py for details) - info_dict = torrent_data.torrent_dict['info'] - - changed_to_utf8 = backend._replace_utf8_values(torrent_data.torrent_dict) - - torrent_filesize = info_dict.get('length') or sum( - f['length'] for f in info_dict.get('files')) - - # In case no encoding, assume UTF-8. - torrent_encoding = torrent_data.torrent_dict.get('encoding', b'utf-8').decode('utf-8') - - # Store bencoded info_dict - torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict) - torrent.has_torrent = True - - # To simplify parsing the filelist, turn single-file torrent into a list - torrent_filelist = info_dict.get('files') - - used_path_encoding = changed_to_utf8 and 'utf-8' or torrent_encoding - - parsed_file_tree = dict() - if not torrent_filelist: - # If single-file, the root will be the file-tree (no directory) - file_tree_root = parsed_file_tree - torrent_filelist = [{'length': torrent_filesize, 'path': [info_dict['name']]}] - else: - # If multi-file, use the directory name as root for files - file_tree_root = parsed_file_tree.setdefault( - info_dict['name'].decode(used_path_encoding), {}) - - # Parse file dicts into a tree - for file_dict in torrent_filelist: - # Decode path parts from utf8-bytes - path_parts = [path_part.decode(used_path_encoding) for path_part in file_dict['path']] - - filename = path_parts.pop() - current_directory = file_tree_root - - for directory in path_parts: - current_directory = current_directory.setdefault(directory, {}) - - # Don't add empty filenames (BitComet directory) - if filename: - current_directory[filename] = file_dict['length'] - - parsed_file_tree = utils.sorted_pathdict(parsed_file_tree) - - json_bytes = json.dumps(parsed_file_tree, separators=(',', ':')).encode('utf8') - torrent.filelist = models.TorrentFilelist(filelist_blob=json_bytes) - - db.session.add(torrent) - db.session.flush() - - # Store the users trackers - trackers = OrderedSet() - announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') - if announce: - trackers.add(announce) - - # List of lists with single item - announce_list = torrent_data.torrent_dict.get('announce-list', []) - for announce in announce_list: - trackers.add(announce[0].decode('ascii')) - - # Remove our trackers, maybe? TODO ? - - # Search for/Add trackers in DB - db_trackers = OrderedSet() - for announce in trackers: - tracker = models.Trackers.by_uri(announce) - - # Insert new tracker if not found - if not tracker: - tracker = models.Trackers(uri=announce) - db.session.add(tracker) - db.session.flush() - - db_trackers.add(tracker) - - # Store tracker refs in DB - for order, tracker in enumerate(db_trackers): - torrent_tracker = models.TorrentTrackers(torrent_id=torrent.id, - tracker_id=tracker.id, order=order) - db.session.add(torrent_tracker) - - db.session.commit() - - return 'success' - - # ####################################### INFO ####################################### ID_PATTERN = '^[0-9]+$' INFO_HASH_PATTERN = '^[0-9a-fA-F]{40}$' # INFO_HASH as string diff --git a/nyaa/backend.py b/nyaa/backend.py index 523a8b6..c8bb0ac 100644 --- a/nyaa/backend.py +++ b/nyaa/backend.py @@ -1,21 +1,43 @@ import json import os +import re from datetime import datetime, timedelta from ipaddress import ip_address -from urllib.parse import urlencode -from urllib.request import urlopen import flask -from werkzeug import secure_filename +from werkzeug.utils import secure_filename import sqlalchemy -from orderedset import OrderedSet +from orderly_set import OrderlySet from nyaa import models, utils from nyaa.extensions import db app = flask.current_app +# Blacklists for _validate_torrent_filenames +# TODO: consider moving to config.py? +CHARACTER_BLACKLIST = [ + '\u202E', # RIGHT-TO-LEFT OVERRIDE +] +FILENAME_BLACKLIST = [ + # Windows reserved filenames + 'con', + 'nul', + 'prn', + 'aux', + 'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', + 'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', +] + +# Invalid RSS characters regex, used to sanitize some strings +ILLEGAL_XML_CHARS_RE = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]') + + +def sanitize_string(string, replacement='\uFFFD'): + ''' Simply replaces characters based on a regex ''' + return ILLEGAL_XML_CHARS_RE.sub(replacement, string) + class TorrentExtraValidationException(Exception): def __init__(self, errors={}): @@ -64,16 +86,14 @@ def _recursive_dict_iterator(source): def _validate_torrent_filenames(torrent): - ''' Checks path parts of a torrent's filetree against blacklisted characters, - returning False on rejection ''' - # TODO Move to config.py - character_blacklist = [ - '\u202E', # RIGHT-TO-LEFT OVERRIDE - ] + ''' Checks path parts of a torrent's filetree against blacklisted characters + and filenames, returning False on rejection ''' file_tree = json.loads(torrent.filelist.filelist_blob.decode('utf-8')) for path_part, value in _recursive_dict_iterator(file_tree): - if any(True for c in character_blacklist if c in path_part): + if path_part.rsplit('.', 1)[0].lower() in FILENAME_BLACKLIST: + return False + if any(True for c in CHARACTER_BLACKLIST if c in path_part): return False return True @@ -119,7 +139,9 @@ def check_uploader_ratelimit(user): def filter_uploader(query): if user: - return query.filter(Torrent.user == user) + return query.filter(sqlalchemy.or_( + Torrent.user == user, + Torrent.uploader_ip == ip_address(flask.request.remote_addr).packed)) else: return query.filter(Torrent.uploader_ip == ip_address(flask.request.remote_addr).packed) @@ -160,11 +182,23 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): upload_form.ratelimit.errors = ["You've gone over the upload ratelimit."] raise TorrentExtraValidationException() - # Delete exisiting torrent which is marked as deleted + if not uploading_user: + if app.config['RAID_MODE_LIMIT_UPLOADS']: + # XXX TODO: rename rangebanned to something more generic + upload_form.rangebanned.errors = [app.config['RAID_MODE_UPLOADS_MESSAGE']] + raise TorrentExtraValidationException() + elif models.RangeBan.is_rangebanned(ip_address(flask.request.remote_addr).packed): + upload_form.rangebanned.errors = ["Your IP is banned from " + "uploading anonymously."] + raise TorrentExtraValidationException() + + # Delete existing torrent which is marked as deleted if torrent_data.db_id is not None: - models.Torrent.query.filter_by(id=torrent_data.db_id).delete() + old_torrent = models.Torrent.by_id(torrent_data.db_id) + db.session.delete(old_torrent) db.session.commit() - _delete_cached_torrent_file(torrent_data.db_id) + # Delete physical file after transaction has been committed + _delete_info_dict(old_torrent) # The torrent has been validated and is safe to access with ['foo'] etc - all relevant # keys and values have been checked for (see UploadForm in forms.py for details) @@ -177,6 +211,11 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): information = (upload_form.information.data or '').strip() description = (upload_form.description.data or '').strip() + # Sanitize fields + display_name = sanitize_string(display_name) + information = sanitize_string(information) + description = sanitize_string(description) + torrent_filesize = info_dict.get('length') or sum( f['length'] for f in info_dict.get('files')) @@ -195,7 +234,14 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): uploader_ip=ip_address(flask.request.remote_addr).packed) # Store bencoded info_dict - torrent.info = models.TorrentInfo(info_dict=torrent_data.bencoded_info_dict) + info_dict_path = torrent.info_dict_path + + info_dict_dir = os.path.dirname(info_dict_path) + os.makedirs(info_dict_dir, exist_ok=True) + + with open(info_dict_path, 'wb') as out_file: + out_file.write(torrent_data.bencoded_info_dict) + torrent.stats = models.Statistic() torrent.has_torrent = True @@ -211,6 +257,10 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): # To do, automatically mark trusted if user is trusted unless user specifies otherwise torrent.trusted = upload_form.is_trusted.data if can_mark_trusted else False + # Only allow mods to upload locked torrents + can_mark_locked = uploading_user and uploading_user.is_moderator + torrent.comment_locked = upload_form.is_comment_locked.data if can_mark_locked else False + # Set category ids torrent.main_category_id, torrent.sub_category_id = \ upload_form.category.parsed_data.get_category_ids() @@ -254,7 +304,7 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): db.session.flush() # Store the users trackers - trackers = OrderedSet() + trackers = OrderlySet() announce = torrent_data.torrent_dict.get('announce', b'').decode('ascii') if announce: trackers.add(announce) @@ -269,12 +319,12 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): webseed_list = torrent_data.torrent_dict.get('url-list') or [] if isinstance(webseed_list, bytes): webseed_list = [webseed_list] # qB doesn't contain a sole url in a list - webseeds = OrderedSet(webseed.decode('utf-8') for webseed in webseed_list) + webseeds = OrderlySet(webseed.decode('utf-8') for webseed in webseed_list) # Remove our trackers, maybe? TODO ? # Search for/Add trackers in DB - db_trackers = OrderedSet() + db_trackers = OrderlySet() for announce in trackers: tracker = models.Trackers.by_uri(announce) @@ -313,6 +363,9 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): # Before final commit, validate the torrent again validate_torrent_post_upload(torrent, upload_form) + # Add to tracker whitelist + db.session.add(models.TrackerApi(torrent.info_hash, 'insert')) + db.session.commit() # Store the actual torrent file as well @@ -321,8 +374,7 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): torrent_file.seek(0, 0) torrent_dir = app.config['BACKUP_TORRENT_FOLDER'] - if not os.path.exists(torrent_dir): - os.makedirs(torrent_dir) + os.makedirs(torrent_dir, exist_ok=True) torrent_path = os.path.join(torrent_dir, '{}.{}'.format( torrent.id, secure_filename(torrent_file.filename))) @@ -332,38 +384,7 @@ def handle_torrent_upload(upload_form, uploading_user=None, fromAPI=False): return torrent -def tracker_api(info_hashes, method): - api_url = app.config.get('TRACKER_API_URL') - if not api_url: - return False - - # Split list into at most 100 elements - chunk_size = 100 - chunk_range = range(0, len(info_hashes), chunk_size) - chunked_info_hashes = (info_hashes[i:i + chunk_size] for i in chunk_range) - - for info_hashes_chunk in chunked_info_hashes: - qs = [ - ('auth', app.config.get('TRACKER_API_AUTH')), - ('method', method) - ] - - qs.extend(('info_hash', info_hash) for info_hash in info_hashes_chunk) - - api_url += '?' + urlencode(qs) - try: - req = urlopen(api_url) - except: - return False - - if req.status != 200: - return False - return True - - -def _delete_cached_torrent_file(torrent_id): - # Note: obviously temporary - cached_torrent = os.path.join(app.config['BASE_DIR'], - 'torrent_cache', str(torrent_id) + '.torrent') - if os.path.exists(cached_torrent): - os.remove(cached_torrent) +def _delete_info_dict(torrent): + info_dict_path = torrent.info_dict_path + if os.path.exists(info_dict_path): + os.remove(info_dict_path) diff --git a/nyaa/bencode.py b/nyaa/bencode.py index 6cdd14c..1866dd1 100644 --- a/nyaa/bencode.py +++ b/nyaa/bencode.py @@ -67,7 +67,7 @@ def _bencode_decode(file_object, decode_keys_as_utf8=True): elif c == _B_END: try: return int(int_bytes.decode('utf8')) - except Exception as e: + except Exception: raise create_ex('Unable to parse int') # not a digit OR '-' in the middle of the int @@ -109,7 +109,7 @@ def _bencode_decode(file_object, decode_keys_as_utf8=True): raise create_ex('Unexpected input while reading string length: ' + repr(c)) try: str_len = int(str_len_bytes.decode()) - except Exception as e: + except Exception: raise create_ex('Unable to parse bytestring length') bytestring = file_object.read(str_len) diff --git a/nyaa/custom_pagination.py b/nyaa/custom_pagination.py new file mode 100644 index 0000000..f83be5d --- /dev/null +++ b/nyaa/custom_pagination.py @@ -0,0 +1,99 @@ +from typing import Any, List, Optional, Sequence, TypeVar, Union + +T = TypeVar('T') + +class CustomPagination: + """ + A custom pagination class that mimics the interface of Flask-SQLAlchemy's Pagination + but doesn't rely on the _query_items method. + """ + + def __init__(self, query: Any, page: int, per_page: int, total: int, items: List[T]): + """ + Initialize a new CustomPagination object. + + Args: + query: The query object (not used, but kept for compatibility) + page: The current page number (1-indexed) + per_page: The number of items per page + total: The total number of items + items: The items on the current page + """ + self.query = query + self.page = page + self.per_page = per_page + self.total = total + self.items = items + + # For compatibility with LimitedPagination + self.actual_count = total + + @property + def has_prev(self) -> bool: + """Return True if there is a previous page.""" + return self.page > 1 + + @property + def has_next(self) -> bool: + """Return True if there is a next page.""" + return self.page < self.pages + + @property + def pages(self) -> int: + """The total number of pages.""" + if self.per_page == 0 or self.total == 0: + return 0 + return max(1, (self.total + self.per_page - 1) // self.per_page) + + @property + def prev_num(self) -> Optional[int]: + """The previous page number, or None if this is the first page.""" + if self.has_prev: + return self.page - 1 + return None + + @property + def next_num(self) -> Optional[int]: + """The next page number, or None if this is the last page.""" + if self.has_next: + return self.page + 1 + return None + + @property + def first(self) -> int: + """The number of the first item on the page, starting from 1, or 0 if there are no items.""" + if not self.items: + return 0 + return (self.page - 1) * self.per_page + 1 + + @property + def last(self) -> int: + """The number of the last item on the page, starting from 1, inclusive, or 0 if there are no items.""" + if not self.items: + return 0 + return min(self.total, self.page * self.per_page) + + def iter_pages(self, left_edge: int = 2, left_current: int = 2, + right_current: int = 5, right_edge: int = 2) -> Sequence[Optional[int]]: + """ + Yield page numbers for a pagination widget. + + Skipped pages between the edges and middle are represented by a None. + """ + last = 0 + for num in range(1, self.pages + 1): + if (num <= left_edge or + (num > self.page - left_current - 1 and num < self.page + right_current) or + num > self.pages - right_edge): + if last + 1 != num: + yield None + yield num + last = num + + def __iter__(self): + """Iterate over the items on the current page.""" + return iter(self.items) + + def __len__(self): + """Return the number of items on the current page.""" + return len(self.items) diff --git a/nyaa/email.py b/nyaa/email.py index a74a631..f0a997c 100644 --- a/nyaa/email.py +++ b/nyaa/email.py @@ -45,29 +45,58 @@ class EmailHolder(object): def send_email(email_holder): + """Send an email using the configured mail backend.""" mail_backend = app.config.get('MAIL_BACKEND') - if mail_backend == 'mailgun': - _send_mailgun(email_holder) - elif mail_backend == 'smtp': - _send_smtp(email_holder) - elif mail_backend: - # TODO: Do this in logging.error when we have that set up - print('Unknown mail backend:', mail_backend) + + if not mail_backend: + app.logger.warning('No mail backend configured, skipping email send') + return False + + try: + if mail_backend == 'mailgun': + success = _send_mailgun(email_holder) + elif mail_backend == 'smtp': + success = _send_smtp(email_holder) + else: + app.logger.error(f'Unknown mail backend: {mail_backend}') + return False + + if not success: + app.logger.error(f'Failed to send email using {mail_backend} backend') + return False + + app.logger.info(f'Email successfully sent using {mail_backend} backend') + return True + + except Exception as e: + app.logger.error(f'Error sending email: {str(e)}') + return False def _send_mailgun(email_holder): - mailgun_endpoint = app.config['MAILGUN_API_BASE'] + '/messages' - auth = ('api', app.config['MAILGUN_API_KEY']) - data = { - 'from': app.config['MAIL_FROM_ADDRESS'], - 'to': email_holder.format_recipient(), - 'subject': email_holder.subject, - 'text': email_holder.text, - 'html': email_holder.html - } - r = requests.post(mailgun_endpoint, data=data, auth=auth) - # TODO real error handling? - assert r.status_code == 200 + """Send an email using Mailgun API with proper error handling.""" + try: + mailgun_endpoint = app.config['MAILGUN_API_BASE'] + '/messages' + auth = ('api', app.config['MAILGUN_API_KEY']) + data = { + 'from': app.config['MAIL_FROM_ADDRESS'], + 'to': email_holder.format_recipient(), + 'subject': email_holder.subject, + 'text': email_holder.text, + 'html': email_holder.html + } + + r = requests.post(mailgun_endpoint, data=data, auth=auth) + + if r.status_code != 200: + app.logger.error(f'Mailgun API error: {r.status_code} - {r.text}') + return False + + return True + + except Exception as e: + app.logger.error(f'Error sending email via Mailgun: {str(e)}') + return False def _send_smtp(email_holder): diff --git a/nyaa/extensions.py b/nyaa/extensions.py index 7abc26e..44753d6 100644 --- a/nyaa/extensions.py +++ b/nyaa/extensions.py @@ -1,19 +1,45 @@ import os.path +from typing import Any, Optional, Sequence, TypeVar, Union from flask import abort from flask.config import Config from flask_assets import Environment +from flask_caching import Cache from flask_debugtoolbar import DebugToolbarExtension -from flask_sqlalchemy import BaseQuery, Pagination, SQLAlchemy +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address +from flask_sqlalchemy import SQLAlchemy +from flask_sqlalchemy.pagination import Pagination +from sqlalchemy.orm import Query assets = Environment() db = SQLAlchemy() toolbar = DebugToolbarExtension() +cache = Cache() +limiter = Limiter(key_func=get_remote_address) + +# Type variable for query results +T = TypeVar('T') -def fix_paginate(): +class LimitedPagination(Pagination): + def __init__(self, actual_count: int, *args: Any, **kwargs: Any) -> None: + self.actual_count = actual_count + super().__init__(*args, **kwargs) - def paginate_faste(self, page=1, per_page=50, max_page=None, step=5, count_query=None): + +def fix_paginate() -> None: + """Add custom pagination method to SQLAlchemy Query.""" + + def paginate_faste( + self: Query[T], + page: int = 1, + per_page: int = 50, + max_page: Optional[int] = None, + step: int = 5, + count_query: Optional[Query[int]] = None + ) -> LimitedPagination: + """Custom pagination that supports max_page and count_query.""" if page < 1: abort(404) @@ -25,6 +51,13 @@ def fix_paginate(): total_query_count = count_query.scalar() else: total_query_count = self.count() + + if total_query_count is None: + total_query_count = 0 + + actual_query_count = total_query_count + if max_page: + total_query_count = min(total_query_count, max_page * per_page) # Grab items on current page items = self.limit(per_page).offset((page - 1) * per_page).all() @@ -32,19 +65,23 @@ def fix_paginate(): if not items and page != 1: abort(404) - return Pagination(self, page, per_page, total_query_count, items) + return LimitedPagination(actual_query_count, self, page, per_page, total_query_count, + items) - BaseQuery.paginate_faste = paginate_faste + # Monkey patch the Query class + setattr(Query, 'paginate_faste', paginate_faste) -def _get_config(): - # Workaround to get an available config object before the app is initiallized - # Only needed/used in top-level and class statements - # https://stackoverflow.com/a/18138250/7597273 +def _get_config() -> Config: + """ + Workaround to get an available config object before the app is initialized. + Only needed/used in top-level and class statements. + https://stackoverflow.com/a/18138250/7597273 + """ root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) - config = Config(root_path) - config.from_object('config') - return config + config_obj = Config(root_path) + config_obj.from_object('config') + return config_obj config = _get_config() diff --git a/nyaa/fixed_ban.py b/nyaa/fixed_ban.py new file mode 100644 index 0000000..016ea67 --- /dev/null +++ b/nyaa/fixed_ban.py @@ -0,0 +1,26 @@ +from typing import Optional, Union +from sqlalchemy import or_, select +from nyaa.extensions import db +from nyaa.models import Ban + +# Fix the banned method to return a query object instead of a list +@classmethod +def fixed_banned(cls, user_id: Optional[int], user_ip: Optional[bytes]): + """Check if a user or IP is banned. + + Returns a query object that can be further filtered or used with .first(), .all(), etc. + """ + if not user_id and not user_ip: + # Return an empty query that will return no results + return db.session.query(cls).filter(cls.id < 0) + + clauses = [] + if user_id: + clauses.append(cls.user_id == user_id) + if user_ip: + clauses.append(cls.user_ip == user_ip) + + return db.session.query(cls).filter(or_(*clauses)) + +# Replace the original method with our fixed version +Ban.banned = fixed_banned diff --git a/nyaa/forms.py b/nyaa/forms.py index 335f04a..ddf1526 100644 --- a/nyaa/forms.py +++ b/nyaa/forms.py @@ -11,8 +11,13 @@ from wtforms import (BooleanField, HiddenField, PasswordField, SelectField, Stri SubmitField, TextAreaField) from wtforms.validators import (DataRequired, Email, EqualTo, Length, Optional, Regexp, StopValidation, ValidationError) +# from wtforms.widgets import HTMLString # For DisabledSelectField +from markupsafe import Markup from wtforms.widgets import Select as SelectWidget # For DisabledSelectField -from wtforms.widgets import HTMLString, html_params # For DisabledSelectField +from wtforms.widgets import html_params + +import dns.exception +import dns.resolver from nyaa import bencode, models, utils from nyaa.extensions import config @@ -69,6 +74,59 @@ def upload_recaptcha_validator_shim(form, field): return True +def register_email_blacklist_validator(form, field): + email_blacklist = app.config.get('EMAIL_BLACKLIST', []) + email = field.data.strip() + validation_exception = StopValidation('Blacklisted email provider') + + for item in email_blacklist: + if isinstance(item, re.Pattern): + if item.search(email): + raise validation_exception + elif isinstance(item, str): + if item in email.lower(): + raise validation_exception + else: + raise Exception('Unexpected email validator type {!r} ({!r})'.format(type(item), item)) + return True + + +def register_email_server_validator(form, field): + server_blacklist = app.config.get('EMAIL_SERVER_BLACKLIST', []) + if not server_blacklist: + return True + + validation_exception = StopValidation('Blacklisted email provider') + email = field.data.strip() + email_domain = email.split('@', 1)[-1] + + try: + # Query domain MX records + mx_records = list(dns.resolver.query(email_domain, 'MX')) + + except dns.exception.DNSException: + app.logger.error('Unable to query MX records for email: %s - ignoring', + email, exc_info=False) + return True + + for mx_record in mx_records: + try: + # Query mailserver A records + a_records = list(dns.resolver.query(mx_record.exchange)) + for a_record in a_records: + # Check for address in blacklist + if a_record.address in server_blacklist: + app.logger.warning('Rejected email %s due to blacklisted mailserver (%s, %s)', + email, a_record.address, mx_record.exchange) + raise validation_exception + + except dns.exception.DNSException: + app.logger.warning('Failed to query A records for mailserver: %s (%s) - ignoring', + mx_record.exchange, email, exc_info=False) + + return True + + _username_validator = Regexp( r'^[a-zA-Z0-9_\-]+$', message='Your username must only consist of alphanumerics and _- (a-zA-Z0-9_-)') @@ -105,14 +163,16 @@ class RegisterForm(FlaskForm): DataRequired(), Length(min=3, max=32), stop_on_validation_error(_username_validator), - Unique(User, User.username, 'Username not availiable') + Unique(User, User.username, 'Username not available') ]) email = StringField('Email address', [ Email(), DataRequired(), Length(min=5, max=128), - Unique(User, User.email, 'Email already in use by another account') + register_email_blacklist_validator, + Unique(User, User.email, 'Email already in use by another account'), + register_email_server_validator ]) password = PasswordField('Password', [ @@ -146,6 +206,10 @@ class ProfileForm(FlaskForm): ]) password_confirm = PasswordField('Repeat New Password') + hide_comments = BooleanField('Hide comments by default') + + authorized_submit = SubmitField('Update') + submit_settings = SubmitField('Update') # Classes for a SelectField that can be set to disable options (id, name, disabled) @@ -160,7 +224,7 @@ class DisabledSelectWidget(SelectWidget): extra = disabled and {'disabled': ''} or {} html.append(self.render_option(val, label, selected, **extra)) html.append('') - return HTMLString(''.join(html)) + return Markup(''.join(html)) class DisabledSelectField(SelectField): @@ -187,6 +251,8 @@ class CommentForm(FlaskForm): DataRequired(message='Comment must not be empty.') ]) + recaptcha = RecaptchaField(validators=[upload_recaptcha_validator_shim]) + class InlineButtonWidget(object): """ @@ -200,7 +266,7 @@ class InlineButtonWidget(object): kwargs.setdefault('type', self.input_type) if not label: label = field.label.text - return HTMLString(' + + +{% endif %} diff --git a/nyaa/templates/infobubble_content.html b/nyaa/templates/infobubble_content.html new file mode 100644 index 0000000..735e86a --- /dev/null +++ b/nyaa/templates/infobubble_content.html @@ -0,0 +1 @@ +Put your announcements into infobubble_content.html! diff --git a/nyaa/templates/layout.html b/nyaa/templates/layout.html index 79c7b42..6021a05 100644 --- a/nyaa/templates/layout.html +++ b/nyaa/templates/layout.html @@ -4,7 +4,7 @@ {% block title %}{{ config.SITE_NAME }}{% endblock %} - + @@ -48,8 +48,12 @@ - - + {% assets "bs_js" %} + + {% endassets %} + {% assets "main_js" %} + + {% endassets %}