pip install scrapy import scrapy class MySpider(scrapy.Spider): name = 'myspider' start_urls = ['https://example.com'] # Ваши сайты для индексации def parse(self, response): yield { 'url': response.url, 'title': response.css('title::text').get(), 'content': response.css('body::text').get()[:500] # Первые 500 символов } scrapy runspider myspider.py -o results.json pip install whoosh from whoosh.index import create_in from whoosh.fields import * schema = Schema(url=TEXT(stored=True), title=TEXT(stored=True), content=TEXT) ix = create_in("indexdir", schema) # Папка для индекса writer = ix.writer() writer.add_document(url="https://example.com", title="Пример", content="Текст страницы...") writer.commit() from whoosh.qparser import QueryParser with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse("поисковый запрос") results = searcher.search(query) for hit in results: print(hit["title"], hit["url"])