Files
luvul_crawl/bot/logger.js
T
tetsuya-kitayama b627224308 init
2026-05-18 10:19:19 +09:00

221 lines
6.2 KiB
JavaScript

const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
function getDateString(date = new Date()) {
return date.getFullYear() + '_' + ('0' + (date.getMonth() + 1)).slice(-2) + '_' + ('0' + date.getDate()).slice(-2);
}
function ensureLogsDir(baseDir) {
fs.existsSync(baseDir) || fs.mkdirSync(baseDir, { recursive: true });
}
function inferYear(month, day, now = new Date()) {
let year = now.getFullYear();
const candidate = new Date(year, month - 1, day);
const diffMs = candidate.getTime() - now.getTime();
if (diffMs > 31 * 24 * 60 * 60 * 1000) {
year -= 1;
}
return year;
}
function parseMessageDateParts(message, now = new Date()) {
const match = message.match(/\((?:(\d{4})\/)?(\d{1,2})\/(\d{1,2})\s+(\d{1,2}):(\d{2}):(\d{2})\)\s*$/);
if (!match) {
return null;
}
const month = Number(match[2]);
const day = Number(match[3]);
return {
year: match[1] ? Number(match[1]) : inferYear(month, day, now),
month,
day,
hour: Number(match[4]),
minute: Number(match[5]),
second: Number(match[6]),
};
}
function getMessageDateString(message, now = new Date()) {
const parts = parseMessageDateParts(message, now);
if (!parts) {
return getDateString(now);
}
return getDateString(new Date(parts.year, parts.month - 1, parts.day));
}
function parseMessageTimestamp(message, now = new Date()) {
const parts = parseMessageDateParts(message, now);
if (!parts) {
return null;
}
return new Date(parts.year, parts.month - 1, parts.day, parts.hour, parts.minute, parts.second).getTime();
}
function getLogFilePath(baseDir, rid, dateString) {
return path.join(baseDir, `log_${rid}_${dateString}.txt`);
}
function loadMessages(filePath) {
if (!fs.existsSync(filePath)) {
return [];
}
return fs.readFileSync(filePath, 'utf8')
.split(/\r?\n/)
.filter((line) => line.length > 0);
}
function sortMessages(messages) {
return [...messages]
.map((message, index) => ({
message,
index,
timestamp: parseMessageTimestamp(message),
}))
.sort((left, right) => {
if (left.timestamp === null && right.timestamp === null) {
return left.index - right.index;
}
if (left.timestamp === null) {
return 1;
}
if (right.timestamp === null) {
return -1;
}
if (left.timestamp !== right.timestamp) {
return left.timestamp - right.timestamp;
}
return left.index - right.index;
})
.map((entry) => entry.message);
}
const roomStates = new Map();
function getRoomState(baseDir, rid, dateString) {
const stateKey = `${baseDir}::${rid}::${dateString}`;
if (!roomStates.has(stateKey)) {
const filePath = getLogFilePath(baseDir, rid, dateString);
roomStates.set(stateKey, {
filePath,
messages: new Set(loadMessages(filePath)),
});
}
return roomStates.get(stateKey);
}
function persistMessages(filePath, messages) {
const sortedMessages = sortMessages(messages);
const content = sortedMessages.length > 0 ? sortedMessages.join('\n') + '\n' : '';
fs.writeFileSync(filePath, content, 'utf8');
}
function recordMessage(rid, message, baseDir = path.join(__dirname, 'logs')) {
ensureLogsDir(baseDir);
const dateString = getMessageDateString(message);
const state = getRoomState(baseDir, rid, dateString);
if (state.messages.has(message)) {
return false;
}
state.messages.add(message);
persistMessages(state.filePath, state.messages);
return true;
}
async function main() {
// ブラウザを起動
const browser = await puppeteer.launch({
headless: true,
executablePath: '/usr/bin/chromium-browser'
});
const rids = [
'360109',
];
for (const rid of rids) {
const url = 'https://chat.luvul.net/ChatRoom?room_id=' + rid;
const page = await browser.newPage();
ensureLogsDir(path.join(__dirname, 'logs'));
// チャットサイトにアクセス
await page.goto(url);
// チャットコンテナのセレクタ
const chatContainerSelector = '#chatlogarea'; // 適切なセレクタに置き換え
// チャットコンテナがロードされるまで待機
await page.waitForSelector(chatContainerSelector);
console.log('チャットの監視を開始します');
// チャットの監視
await page.exposeFunction('onNewMessage', (message) => {
console.log(message);
recordMessage(rid, message);
});
await page.evaluate((chatContainerSelector) => {
const chatContainer = document.querySelector(chatContainerSelector);
if (chatContainer) {
const previousMessages = new Set();
// MutationObserverを設定
const observer = new MutationObserver((mutationsList) => {
for (const mutation of mutationsList) {
if (mutation.type === 'childList') {
mutation.addedNodes.forEach((node) => {
if (node.nodeType === Node.ELEMENT_NODE) {
const messageText = node.textContent.trim();
if (!previousMessages.has(messageText)) {
previousMessages.add(messageText);
window.onNewMessage(messageText); // 新しいメッセージを通知
}
}
});
}
}
});
// 既存のメッセージも処理
var messages = [];
chatContainer.childNodes.forEach((node) => {
messages.unshift(node.textContent.trim());
})
for (message of messages) {
if (message.search(/\(\d+\/\d+ \d+:\d+:\d+\)/) !== -1) {
window.onNewMessage(message)
}
}
// 監視を開始
observer.observe(chatContainer, { childList: true, subtree: true });
}
}, chatContainerSelector);
}
// スクリプトを終了しないように待機
await new Promise(() => {});
}
if (require.main === module) {
main();
}
module.exports = {
getDateString,
getLogFilePath,
getMessageDateString,
parseMessageTimestamp,
recordMessage,
sortMessages,
};