Fixed Elasticsearch

This commit is contained in:
sb745 2025-12-22 03:35:11 +02:00
parent 0c7733ca83
commit 4fe13c1df3
No known key found for this signature in database
GPG key ID: FCECC197D40D3DE0
5 changed files with 21 additions and 15 deletions

View file

@ -7,7 +7,7 @@ Running Nyaa on Windows may be possible, but it's currently unsupported.
### Major changes from NyaaV2 ### Major changes from NyaaV2
- Updated from Python 3.7 to Python 3.14 - Updated from Python 3.7 to Python 3.14
- Updated all dependencies to their latest versions - Updated all dependencies
- Modernized code patterns for Flask 3.0 and SQLAlchemy 2.0 - Modernized code patterns for Flask 3.0 and SQLAlchemy 2.0
- Replaced deprecated Flask-Script, orderedset and `flask.Markup` with Flask CLI, orderly-set and markupsafe - Replaced deprecated Flask-Script, orderedset and `flask.Markup` with Flask CLI, orderly-set and markupsafe
- Implemented mail error handling - Implemented mail error handling
@ -77,7 +77,7 @@ Continue below to learn about database migrations and enabling the advanced sear
## Setting up and enabling Elasticsearch ## Setting up and enabling Elasticsearch
### Installing Elasticsearch ### Installing Elasticsearch
- Install JDK with `sudo apt-get install openjdk-8-jdk` - Install JDK with `sudo apt-get install openjdk-21-jdk`
- Install Elasticsearch - Install Elasticsearch
- [From packages](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html) - [From packages](https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html)
- Enable the service: - Enable the service:
@ -85,6 +85,7 @@ Continue below to learn about database migrations and enabling the advanced sear
- `sudo systemctl start elasticsearch.service` - `sudo systemctl start elasticsearch.service`
- or [simply extracting the archives and running the files](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html), if you don't feel like permanently installing ES - or [simply extracting the archives and running the files](https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html), if you don't feel like permanently installing ES
- Run `curl -XGET 'localhost:9200'` and make sure ES is running - Run `curl -XGET 'localhost:9200'` and make sure ES is running
- You may need to set `xpack.security.enabled: false` in your `elasticsearch.yml` file if curl output is empty
- Install [Kibana](https://www.elastic.co/products/kibana) as a search debug frontend for ES (*optional*) - Install [Kibana](https://www.elastic.co/products/kibana) as a search debug frontend for ES (*optional*)
### Enabling MySQL Binlogging ### Enabling MySQL Binlogging
@ -99,7 +100,10 @@ Continue below to learn about database migrations and enabling the advanced sear
- Copy the example configuration (`es_sync_config.example.json`) as `es_sync_config.json` and adjust options in it to your liking (verify the connection options!) - Copy the example configuration (`es_sync_config.example.json`) as `es_sync_config.json` and adjust options in it to your liking (verify the connection options!)
- Connect to mysql as root - Connect to mysql as root
- Verify that the result of `SHOW VARIABLES LIKE 'binlog_format';` is `ROW` - Verify that the result of `SHOW VARIABLES LIKE 'binlog_format';` is `ROW`
- Execute `GRANT REPLICATION SLAVE ON *.* TO 'username'@'localhost';` to allow your configured user access to the binlog - Execute `GRANT REPLICATION SLAVE ON *.* TO 'nyaav3'@'localhost';` to allow your configured user access to the binlog and one of the following:
- For MySQL: `GRANT REPLICATION CLIENT ON *.* TO 'nyaauser'@'localhost';`
- For MariaDB: `GRANT BINLOG MONITOR ON *.* TO 'nyaauser'@'localhost';`
- Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei`
### Setting up ES ### Setting up ES
- Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei` - Run `./create_es.sh` to create the indices for the torrents: `nyaa` and `sukebei`
@ -116,8 +120,8 @@ However, take note that binglog is not necessary for simple ES testing and devel
### Setting up sync_es.py ### Setting up sync_es.py
`sync_es.py` keeps the Elasticsearch indices updated by reading the binlog and pushing the changes to the ES indices. `sync_es.py` keeps the Elasticsearch indices updated by reading the binlog and pushing the changes to the ES indices.
- Make sure `es_sync_config.json` is configured with the user you grated the `REPLICATION` permissions - Make sure `es_sync_config.json` is configured with the user you granted the `REPLICATION` permissions
- Run `import_to_es.py` and copy the outputted JSON into the file specified by `save_loc` in your `es_sync_config.json` - Run `python import_to_es.py /path/to/file.json` and copy the outputted JSON into the file specified by `save_loc` in your `es_sync_config.json` file
- Run `sync_es.py` as-is *or*, for actual deployment, set it up as a service and run it, preferably as the system/root - Run `sync_es.py` as-is *or*, for actual deployment, set it up as a service and run it, preferably as the system/root
- Make sure `sync_es.py` runs within the venv with the right dependencies! - Make sure `sync_es.py` runs within the venv with the right dependencies!

View file

@ -180,7 +180,7 @@ ES_MAX_SEARCH_RESULT = 1000
# ES index name generally (nyaa or sukebei) # ES index name generally (nyaa or sukebei)
ES_INDEX_NAME = SITE_FLAVOR ES_INDEX_NAME = SITE_FLAVOR
# ES hosts # ES hosts
ES_HOSTS = ['localhost:9200'] ES_HOSTS = ['http://localhost:9200']
################ ################
## Commenting ## ## Commenting ##

View file

@ -2,8 +2,8 @@
"save_loc": "/tmp/pos.json", "save_loc": "/tmp/pos.json",
"mysql_host": "127.0.0.1", "mysql_host": "127.0.0.1",
"mysql_port": 3306, "mysql_port": 3306,
"mysql_user": "nyaa", "mysql_user": "nyaauser",
"mysql_password": "some_password", "mysql_password": "nyaapass",
"database": "nyaav3", "database": "nyaav3",
"internal_queue_depth": 10000, "internal_queue_depth": 10000,
"es_chunk_size": 10000, "es_chunk_size": 10000,

View file

@ -13,12 +13,13 @@ import progressbar
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient from elasticsearch.client import IndicesClient
from elasticsearch import helpers from elasticsearch import helpers
from sqlalchemy import text
from nyaa import create_app, models from nyaa import create_app, models
from nyaa.extensions import db from nyaa.extensions import db
app = create_app('config') app = create_app('config')
es = Elasticsearch(hosts=app.config['ES_HOSTS'], timeout=30) es = Elasticsearch(hosts=app.config['ES_HOSTS'], request_timeout=30)
ic = IndicesClient(es) ic = IndicesClient(es)
def pad_bytes(in_bytes, size): def pad_bytes(in_bytes, size):
@ -98,14 +99,15 @@ FLAVORS = [
# Get binlog status from mysql # Get binlog status from mysql
with app.app_context(): with app.app_context():
master_status = db.engine.execute('SHOW MASTER STATUS;').fetchone() with db.engine.begin() as connection:
master_status = connection.execute(text('SHOW MASTER STATUS;')).fetchone()
position_json = { position_json = {
'log_file': master_status[0], 'log_file': master_status[0],
'log_pos': master_status[1] 'log_pos': master_status[1]
} }
print('Save the following in the file configured in your ES sync config JSON:') print('Save the following in the file configured in es_sync_config.json:')
print(json.dumps(position_json)) print(json.dumps(position_json))
for flavor, torrent_class in FLAVORS: for flavor, torrent_class in FLAVORS:

View file

@ -68,9 +68,9 @@ stats = StatsClient('localhost', 8125, prefix="sync_es")
SAVE_LOC = config.get('save_loc', "/tmp/pos.json") SAVE_LOC = config.get('save_loc', "/tmp/pos.json")
MYSQL_HOST = config.get('mysql_host', '127.0.0.1') MYSQL_HOST = config.get('mysql_host', '127.0.0.1')
MYSQL_PORT = config.get('mysql_port', 3306) MYSQL_PORT = config.get('mysql_port', 3306)
MYSQL_USER = config.get('mysql_user', 'root') MYSQL_USER = config.get('mysql_user', 'nyaauser')
MYSQL_PW = config.get('mysql_password', 'dunnolol') MYSQL_PW = config.get('mysql_password', 'nyaapass')
NT_DB = config.get('database', 'nyaav2') NT_DB = config.get('database', 'nyaav3')
INTERNAL_QUEUE_DEPTH = config.get('internal_queue_depth', 10000) INTERNAL_QUEUE_DEPTH = config.get('internal_queue_depth', 10000)
ES_CHUNK_SIZE = config.get('es_chunk_size', 10000) ES_CHUNK_SIZE = config.get('es_chunk_size', 10000)
# seconds since no events happening to flush to es. remember this also # seconds since no events happening to flush to es. remember this also
@ -263,7 +263,7 @@ class EsPoster(ExitingThread):
self.flush_interval = flush_interval self.flush_interval = flush_interval
def run_happy(self): def run_happy(self):
es = Elasticsearch(hosts=app.config['ES_HOSTS'], timeout=30) es = Elasticsearch(hosts=app.config['ES_HOSTS'], request_timeout=30)
last_save = time.time() last_save = time.time()
since_last = 0 since_last = 0