This site is developed to XHTML and CSS2 W3C standards. If you see this paragraph, your browser does not support those standards and you need to upgrade. Visit WaSP for a variety of options.
Paste #661

Posted by: url.py
Posted on: 2026-03-15 20:58:46
Age: 93 days ago
Views: 79
# -*- coding: utf-8 -*-
"""
Название:   UrlTitle.py
Описание:   Показывает заголовок страницы при отправке любой ссылки в чат
Версия:     0.4
Автор:      адаптация под 2025–2026
"""

__module_name__        = "UrlTitle"
__module_version__     = "0.4"
__module_description__ = "Показывает <title> страницы при отправке ссылок"

import hexchat
import re
import urllib.request
import urllib.error
import html
import socket

# Настройки — можно менять
TIMEOUT       = 6          # секунд на попытку скачать страницу
MAX_TITLE_LEN = 120        # обрезаем слишком длинные заголовки
USER_AGENT    = "HexChat-UrlTitle/0.4 (compatible; +https://example.com)"

# Самое простое регулярное выражение для url (http(s)://...)
URL_RE = re.compile(r'(https?://[^\s<>"\']+)', re.IGNORECASE)

# Черный список доменов, которые обычно не нужны / долго отвечают / не html
BLACKLIST = {
    "youtu.be", "youtube.com", "youtu.be", "twitter.com", "x.com",
    "t.me", "discord.com", "discordapp.com", "github.com", ".png", ".jpg",
    ".jpeg", ".gif", ".webp", ".pdf", ".zip", ".exe"
}

def is_probably_html(url):
    low = url.lower()
    for bad in BLACKLIST:
        if bad in low:
            return False
    return True

def get_title(url):
    try:
        req = urllib.request.Request(
            url,
            headers={'User-Agent': USER_AGENT}
        )
        with urllib.request.urlopen(req, timeout=TIMEOUT) as response:
            if response.getcode() != 200:
                return None

            content_type = response.headers.get('Content-Type', '').lower()
            if 'text/html' not in content_type:
                return None

            data = response.read(8192 * 2).decode('utf-8', errors='replace')
            m = re.search(r'<title[^>]*>(.*?)</title>', data, re.IGNORECASE | re.DOTALL)
            if m:
                title = html.unescape(m.group(1).strip())
                title = ' '.join(title.split())  # убираем лишние пробелы
                if len(title) > MAX_TITLE_LEN:
                    title = title[:MAX_TITLE_LEN-3] + "..."
                return title
            return None
    except (urllib.error.HTTPError, urllib.error.URLError, socket.timeout,
            UnicodeDecodeError, ConnectionResetError, OSError):
        return None

def on_message(word, word_eol, event_name, attrs=None):
    # word[0]  — ник
    # word[1]  — текст сообщения
    text = word[1]

    urls = URL_RE.findall(text)
    if not urls:
        return hexchat.EAT_NONE

    shown = set()

    for url in urls:
        if url in shown:
            continue

        # Пропускаем очевидные не-html ссылки
        if not is_probably_html(url):
            continue

        title = get_title(url)
        if title:
            # Показываем в текущем канале
            hexchat.prnt(f"→ Title: \00302{title}\003\t({url})")
            shown.add(url)

    return hexchat.EAT_NONE

def unload_cb(userdata):
    hexchat.prnt(f"→ {__module_name__} выгружен")
    return

# ────────────────────────────────────────────────
# Регистрируем хуки на все типы сообщений
# ────────────────────────────────────────────────

for event in (
    "Channel Message",
    "Your Message",           # твои сообщения
    "Channel Msg Hilight",
    "Private Message",
    "Private Message to",
):
    hexchat.hook_print(event, on_message, event, priority=hexchat.PRI_LOW)

hexchat.hook_unload(unload_cb)

hexchat.prnt(f"→ Плагин {__module_name__} {__module_version__} загружен")
Download raw | Create new paste
Recent Posts

Paste #661