From 4fe13c1df3551b01353d191041cb856d8a2ec548 Mon Sep 17 00:00:00 2001 From: sb745 <201226723+sb745@users.noreply.github.com> Date: Mon, 22 Dec 2025 03:35:11 +0200 Subject: [PATCH] Fixed Elasticsearch --- README.md | 14 +++++++++----- config.example.py | 2 +- es_sync_config.example.json | 4 ++-- import_to_es.py | 8 +++++--- sync_es.py | 8 ++++---- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 24e497a..f1a8af0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Running Nyaa on Windows may be possible, but it's currently unsupported. ### Major changes from NyaaV2 - Updated from Python 3.7 to Python 3.14 -- Updated all dependencies to their latest versions +- Updated all dependencies - Modernized code patterns for Flask 3.0 and SQLAlchemy 2.0 - Replaced deprecated Flask-Script, orderedset and `flask.Markup` with Flask CLI, orderly-set and markupsafe - Implemented mail error handling @@ -77,7 +77,7 @@ Continue below to learn about database migrations and enabling the advanced sear ## Setting up and enabling Elasticsearch ### Installing Elasticsearch -- Install JDK with `sudo apt-get install openjdk-8-jdk` +- Install JDK with `sudo apt-get install openjdk-21-jdk` - Install Elasticsearch - [From packages](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html) - Enable the service: @@ -85,6 +85,7 @@ Continue below to learn about database migrations and enabling the advanced sear - `sudo systemctl start elasticsearch.service` - or [simply extracting the archives and running the files](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html), if you don't feel like permanently installing ES - Run `curl -XGET 'localhost:9200'` and make sure ES is running + - You may need to set `xpack.security.enabled: false` in your `elasticsearch.yml` file if curl output is empty - Install [Kibana](https://www.elastic.co/products/kibana) as a search debug frontend for ES (*optional*) ### Enabling MySQL Binlogging @@ -99,7 +100,10 @@ Continue below to learn about database migrations and enabling the advanced sear - Copy the example configuration (`es_sync_config.example.json`) as `es_sync_config.json` and adjust options in it to your liking (verify the connection options!) - Connect to mysql as root - Verify that the result of `SHOW VARIABLES LIKE 'binlog_format';` is `ROW` - - Execute `GRANT REPLICATION SLAVE ON *.* TO 'username'@'localhost';` to allow your configured user access to the binlog + - Execute `GRANT REPLICATION SLAVE ON *.* TO 'nyaav3'@'localhost';` to allow your configured user access to the binlog and one of the following: + - For MySQL: `GRANT REPLICATION CLIENT ON *.* TO 'nyaauser'@'localhost';` + - For MariaDB: `GRANT BINLOG MONITOR ON *.* TO 'nyaauser'@'localhost';` +- Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei` ### Setting up ES - Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei` @@ -116,8 +120,8 @@ However, take note that binglog is not necessary for simple ES testing and devel ### Setting up sync_es.py `sync_es.py` keeps the Elasticsearch indices updated by reading the binlog and pushing the changes to the ES indices. -- Make sure `es_sync_config.json` is configured with the user you grated the `REPLICATION` permissions -- Run `import_to_es.py` and copy the outputted JSON into the file specified by `save_loc` in your `es_sync_config.json` +- Make sure `es_sync_config.json` is configured with the user you granted the `REPLICATION` permissions +- Run `python import_to_es.py /path/to/file.json` and copy the outputted JSON into the file specified by `save_loc` in your `es_sync_config.json` file - Run `sync_es.py` as-is *or*, for actual deployment, set it up as a service and run it, preferably as the system/root - Make sure `sync_es.py` runs within the venv with the right dependencies! diff --git a/config.example.py b/config.example.py index 6c24285..48bde20 100644 --- a/config.example.py +++ b/config.example.py @@ -180,7 +180,7 @@ ES_MAX_SEARCH_RESULT = 1000 # ES index name generally (nyaa or sukebei) ES_INDEX_NAME = SITE_FLAVOR # ES hosts -ES_HOSTS = ['localhost:9200'] +ES_HOSTS = ['http://localhost:9200'] ################ ## Commenting ## diff --git a/es_sync_config.example.json b/es_sync_config.example.json index 658c101..2ccbd6e 100644 --- a/es_sync_config.example.json +++ b/es_sync_config.example.json @@ -2,8 +2,8 @@ "save_loc": "/tmp/pos.json", "mysql_host": "127.0.0.1", "mysql_port": 3306, - "mysql_user": "nyaa", - "mysql_password": "some_password", + "mysql_user": "nyaauser", + "mysql_password": "nyaapass", "database": "nyaav3", "internal_queue_depth": 10000, "es_chunk_size": 10000, diff --git a/import_to_es.py b/import_to_es.py index fbd25b2..6ce5326 100755 --- a/import_to_es.py +++ b/import_to_es.py @@ -13,12 +13,13 @@ import progressbar from elasticsearch import Elasticsearch from elasticsearch.client import IndicesClient from elasticsearch import helpers +from sqlalchemy import text from nyaa import create_app, models from nyaa.extensions import db app = create_app('config') -es = Elasticsearch(hosts=app.config['ES_HOSTS'], timeout=30) +es = Elasticsearch(hosts=app.config['ES_HOSTS'], request_timeout=30) ic = IndicesClient(es) def pad_bytes(in_bytes, size): @@ -98,14 +99,15 @@ FLAVORS = [ # Get binlog status from mysql with app.app_context(): - master_status = db.engine.execute('SHOW MASTER STATUS;').fetchone() + with db.engine.begin() as connection: + master_status = connection.execute(text('SHOW MASTER STATUS;')).fetchone() position_json = { 'log_file': master_status[0], 'log_pos': master_status[1] } - print('Save the following in the file configured in your ES sync config JSON:') + print('Save the following in the file configured in es_sync_config.json:') print(json.dumps(position_json)) for flavor, torrent_class in FLAVORS: diff --git a/sync_es.py b/sync_es.py index aa1adcb..090c792 100755 --- a/sync_es.py +++ b/sync_es.py @@ -68,9 +68,9 @@ stats = StatsClient('localhost', 8125, prefix="sync_es") SAVE_LOC = config.get('save_loc', "/tmp/pos.json") MYSQL_HOST = config.get('mysql_host', '127.0.0.1') MYSQL_PORT = config.get('mysql_port', 3306) -MYSQL_USER = config.get('mysql_user', 'root') -MYSQL_PW = config.get('mysql_password', 'dunnolol') -NT_DB = config.get('database', 'nyaav2') +MYSQL_USER = config.get('mysql_user', 'nyaauser') +MYSQL_PW = config.get('mysql_password', 'nyaapass') +NT_DB = config.get('database', 'nyaav3') INTERNAL_QUEUE_DEPTH = config.get('internal_queue_depth', 10000) ES_CHUNK_SIZE = config.get('es_chunk_size', 10000) # seconds since no events happening to flush to es. remember this also @@ -263,7 +263,7 @@ class EsPoster(ExitingThread): self.flush_interval = flush_interval def run_happy(self): - es = Elasticsearch(hosts=app.config['ES_HOSTS'], timeout=30) + es = Elasticsearch(hosts=app.config['ES_HOSTS'], request_timeout=30) last_save = time.time() since_last = 0