User:YFdyh000/DuplicateReferences.js
注意:保存之后,你必须清除浏览器缓存才能看到做出的更改。Google Chrome、Firefox、Microsoft Edge及Safari:按住⇧ Shift键并单击工具栏的“刷新”按钮。参阅Help:绕过浏览器缓存以获取更多帮助。
// a fork of https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferences.js
// Testpage: https://zh.wikipedia.org/wiki/User:YFdyh000/DuplicateReferencesTest
// <nowiki>
mw.loader.using( ['mediawiki.util', 'ext.gadget.HanAssist'] ).then( ( require ) => {
const { batchConv } = require( 'ext.gadget.HanAssist' );
$(document).ready(function () {
const checkAlways = true; // 设为 false 则仅面板展开才会计算,降低CPU占用。面板自动记忆状态。
const DEBUG = false;
const WARN = true;
function debug(...args) {
if (DEBUG) {
console.log('[DuplicateReferences]', ...args);
}
}
function warn(...args) {
if (WARN) {
console.log('[DuplicateReferences]', ...args);
}
}
if (
mw.config.get('wgAction') !== 'view' ||
mw.config.get('wgDiffNewId') ||
mw.config.get('wgDiffOldId') ||
(mw.config.get('wgNamespaceNumber') !== 0 && (mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest') && mw.config.get('wgPageName') !== 'User:YFdyh000/DuplicateReferencesTest')
) {
debug("Not the correct page or action, script terminated");
return;
}
mw.messages.set( batchConv( {
'dupeRef-toolName': { hans: '文献去重', hant: '文獻去重' },
'dupeRef-addFail': { hans: '添加模板失败,详见网页控制台(F12)中的信息。', hant: '添加模板失敗,詳見網頁控制台(F12)中的信息。' },
'dupeRef-working': { hans: '处理中...', hant: '處理中...' },
'dupeRef-done': { hans: '完成', hant: '完成' },
'dupeRef-error': { hans: '出错', hant: '出錯' },
'dupeRef-toolTitle': { hans: '重复的参考文献 ', hant: '重複的參考文獻 ' },
'dupeRef-toolTitleWithNumber': { hans: '重复的参考文献 ($1/$2) ', hant: '重複的參考文獻 ($1/$2) ' },
'dupeRef-addAction': { hans: '添加', hant: '添加' },
'dupeRef-feedbackFalse': { hans: '反馈误报', hant: '反饋誤報' },
'dupeRef-atRef': { hans: ' 于脚注: ', hant: ' 於腳註:' },
'dupeRef-show': { hans: '显示', hant: '顯示' },
'dupeRef-hide': { hans: '隐藏', hant: '隱藏' },
} ) );
debug("Page title:", document.title);
debug("URL:", window.location.href);
function countUniqueReferencesParents(selector) {
// 获取所有符合选择器的元素
const elements = document.querySelectorAll(selector);
const parentCount = {};
// 遍历元素,统计匹配的父元素
elements.forEach(element => {
let parent = element.closest('ol.references');
if (parent) {
if(parent.parentElement.classList.contains('reflist')||parent.parentElement.classList.contains('notelist')) { // 去掉一层干扰
parent = parent.parentElement;
}
// 获取父元素的上一个兄弟节点
const previousSibling = parent.previousElementSibling;
let uniqueIdentifier = '';
if (previousSibling && previousSibling.classList.contains('mw-heading')) {
const hElement = previousSibling.firstElementChild;
if (hElement && hElement.id) {
uniqueIdentifier = `#${hElement.id}`;
}
}
if (parent.classList.contains('mw-parser-output')) {
uniqueIdentifier = `.mw-parser-output>ol.references`; // `.mw-parser-output>ol.references .reference-text a`
}
parentCount[uniqueIdentifier] = (parentCount[uniqueIdentifier] || 0) + 1;
}
});
// 将结果转换为数组并按数量排序
const sortedParents = Object.entries(parentCount)
.map(([parent, count]) => ({ parent, count }))
.sort((a, b) => b.count - a.count);
return sortedParents;
}
const result = countUniqueReferencesParents(('ol.references .reference-text a'));
if (result.length === 0) {
warn("没有找到参考文献章节");
return;
}
hEl = document.querySelector(result[0].parent);
const containerDiv = hEl.parentElement;
const referencesList = containerDiv.nextElementSibling.querySelector('ol.references');
if (!referencesList) {
warn("references element not found");
return;
}
const style = document.createElement('style');
style.textContent = `
li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}
.duplicate-citation-highlight { background-color: #e1eeff; }
.duplicate-citation-hover { background-color: #cce0ff; border: 1px dotted blue; }
.duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
.mw-collapsible-toggle { font-weight: normal; float: right; }
.duplicate-references-table { width: 100%; }
@media only screen and (max-width: 768px) {
.duplicate-references-table { display: none; }
}
.duplicate-references-table .row {overflow-wrap: anywhere;}
`;
document.head.appendChild(style);
function addDuplicateCitationsTemplate(linkElement) {
debug("Adding duplicate citations template");
showLoading(linkElement);
var api = new mw.Api();
var pageTitle = mw.config.get('wgPageName');
let duplicateInfo = getDuplicateInfo();
// Get current date
const currentDate = new Date();
const isoDate = currentDate.toISOString().split('T')[0];
const dateParam = `|date=${isoDate}`;
api.get({
action: 'query',
prop: 'revisions',
titles: pageTitle,
rvprop: 'content',
rvslots: 'main',
formatversion: 2
}).then(function (data) {
var page = data.query.pages[0];
var content = page.revisions[0].slots.main.content;
// Define the templates to check for
const templatesToCheck = [
'{{short description',
'{{DISPLAYTITLE',
'{{Lowercase title',
'{{Italic title',
'{{about',
'{{redirect',
'{{Distinguish',
'{{for'
];
// Find the position to insert the new template
let insertPosition = 0;
let lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim().toLowerCase();
if (templatesToCheck.some(template => line.startsWith(template.toLowerCase()))) {
insertPosition = i + 1;
} else if (line && !line.startsWith('{{') && !line.startsWith('__')) {
break;
}
}
// Create the reason string
let reason = `[[User:YFdyh000/DuplicateReferences.js|${mw.msg('dupeRef-toolName')}]]检测到:<br>\n`;
if (duplicateInfo.length > 0) {
duplicateInfo.forEach((info) => {
reason += `* ${info.url} (脚注: ${info.refs.map(r => r.number).join(', ')})<br>\n`;
});
}
// Insert the new template with the reason parameter
lines.splice(insertPosition, 0, `{{Duplicated citations|reason=${reason}${dateParam}}}`);
var newContent = lines.join('\n');
let summary = `[[User:YFdyh000/DuplicateReferences.js|${mw.msg('dupeRef-toolName')}]] +{{Duplicated citations|reason=${reason}${dateParam}}}`;
return api.postWithToken('csrf', {
action: 'edit',
title: pageTitle,
text: newContent,
summary: summary
});
}).then(function () {
showSuccess(linkElement);
setTimeout(function () {
location.reload();
}, 100); // Reload after 0.1 second
}).catch(function (error) {
console.error('Error:', error);
showError(linkElement);
mw.notify(`${mw.msg('dupeRef-addFail')}`, {type: 'error'});
});
}
function showLoading(element) {
element.innerHTML = `<sup><small>[ ${mw.msg('dupeRef-working')} ]</small></sup>`;
}
function showSuccess(element) {
element.innerHTML = `<sup><small>[ ${mw.msg('dupeRef-done')} ]</small></sup>`;
}
function showError(element) {
element.innerHTML = `<sup><small>[ ${mw.msg('dupeRef-error')} ]</small></sup>`;
}
function getVisibleText(element) {
// Recursively get the visible text content of an element
let text = '';
for (let node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
text += node.textContent.trim() + ' ';
} else if (node.nodeType === Node.ELEMENT_NODE) {
// Skip hidden elements
const style = window.getComputedStyle(node);
if (style.display !== 'none' && style.visibility !== 'hidden') {
text += getVisibleText(node) + ' ';
}
}
}
return text.trim();
}
function calculateLevenshteinDistance(a, b) {
debug("Comparing:");
debug("Text 1:", a);
debug("Text 2:", b);
if (a.length === 0) return b.length;
if (b.length === 0) return a.length;
const matrix = [];
// Increment along the first column of each row
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
// Increment each column in the first row
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
// Fill in the rest of the matrix
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1, // substitution
Math.min(
matrix[i][j - 1] + 1, // insertion
matrix[i - 1][j] + 1 // deletion
)
);
}
}
}
debug("Levenshtein distance:", matrix[b.length][a.length]);
return matrix[b.length][a.length];
}
function calculateSimilarityPercentage(distance, maxLength) {
const similarity = ((maxLength - distance) / maxLength) * 100;
debug("Similarity percentage:", similarity.toFixed(2) + "%");
return Math.round(similarity) + '%';
}
function getDuplicateInfo() {
debug("Getting duplicate info");
const duplicates = [];
const urlMap = new Map();
const referenceItems = Array.from(referencesList.children);
debug("Number of reference items:", referenceItems.length);
referenceItems.forEach((item, index) => {
if (item.tagName.toLowerCase() === 'li') {
const refId = item.id;
const refNumber = index + 1;
debug(`Processing reference item ${refNumber} (${refId})`);
// Get the visible text of the entire reference item
const refText = getVisibleText(item);
debug(` Reference text: ${refText}`);
// Find the first valid link in the reference
const links = item.querySelectorAll('a');
let validLink = null;
for (let link of links) {
const url = link.href;
// Skip this reference if the URL doesn't contain 'http'
if (!url.includes('http')) {
debug(` Skipping reference ${refNumber} - URL does not contain 'http'`);
return; // This 'return' is equivalent to 'continue' in a regular for loop
}
const linkText = link.textContent.trim();
if (
// (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
(linkText !== "Archived" || linkText !== "存档" || linkText !== "存檔") &&
!url.includes("wikipedia.org") &&
!url.includes("_(identifier)") && // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
!url.startsWith("https://www.bbc.co.uk/news/live/") && // BBC live articles get frequent updates
!url.startsWith("https://books.google.com/") && //may be 2 different pages of the same book
!url.startsWith("https://archive.org/details/isbn_")
) {
validLink = link;
debug(` Valid link found: ${url}`);
break;
}
}
if (validLink) {
const url = validLink.href;
if (urlMap.has(url)) {
urlMap.get(url).push({id: refId, number: refNumber, text: refText});
debug(` Duplicate found for URL: ${url}`);
} else {
urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);
debug(` New URL added to map: ${url}`);
}
} else {
debug(` No valid link found in this item`);
}
}
});
urlMap.forEach((refs, url) => {
if (refs.length > 1) {
// Calculate Levenshtein distance for each pair of refs
for (let i = 0; i < refs.length - 1; i++) {
for (let j = i + 1; j < refs.length; j++) {
debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
const similarity = calculateSimilarityPercentage(distance, maxLength);
refs[i].similarity = refs[i].similarity || {};
refs[i].similarity[refs[j].id] = similarity;
}
}
duplicates.push({url, refs});
}
});
debug("Number of duplicate sets found:", duplicates.length);
debug("Duplicate sets:", duplicates);
return duplicates;
}
function createCollapsibleTable(duplicateInfo) {
const table = document.createElement('table');
table.className = 'wikitable mw-collapsible duplicate-references-table';
table.setAttribute('role', 'presentation');
const tbody = document.createElement('tbody');
table.appendChild(tbody);
const headerRow = document.createElement('tr');
headerRow.classList.add('tool-title');
const headerCell = document.createElement('td');
headerCell.innerHTML = `<strong>${mw.msg('dupeRef-toolTitle')}</strong>`;
const toggleSpan = document.createElement('span');
toggleSpan.className = 'mw-collapsible-toggle';
toggleSpan.innerHTML = `[<a href="#" class="mw-collapsible-text">${mw.msg('dupeRef-hide')}</a>]`;
headerCell.appendChild(toggleSpan);
// Check if the {{Duplicated citations}} template is already present
const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');
// Only add the link if the template is not present
if (!duplicatedCitationsTemplate) {
// Add the "add {{duplicated citations}}" link to the header
const addTemplateLink = document.createElement('a');
addTemplateLink.textContent = `${mw.msg('dupeRef-addAction')}{{duplicated citations}}`;
addTemplateLink.href = '#';
addTemplateLink.addEventListener('click', function (e) {
e.preventDefault();
addDuplicateCitationsTemplate(this);
});
//headerCell.appendChild(document.createTextNode(' ['));
headerCell.appendChild(addTemplateLink);
//headerCell.appendChild(document.createTextNode(']'));
}
headerRow.appendChild(headerCell);
tbody.appendChild(headerRow);
duplicateInfo.forEach(({url, refs}) => {
const row = document.createElement('tr');
row.classList.add('row');
const cell = document.createElement('td');
// Create report icon
const reportIcon = document.createElement('a');
reportIcon.href = `https://zh.wikipedia.org/wiki/User talk:YFdyh000/DuplicateReferences.js`;
reportIcon.innerHTML = `<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="${mw.msg('dupeRef-feedbackFalse')}" title="${mw.msg('dupeRef-feedbackFalse')}" />`;
reportIcon.style.marginRight = '5px';
cell.appendChild(reportIcon);
let urlLink = document.createElement('a');
urlLink.href = url;
urlLink.textContent = url;
urlLink.target = "_blank";
urlLink.rel = "noopener noreferrer";
cell.appendChild(urlLink);
cell.appendChild(document.createTextNode(mw.msg('dupeRef-atRef')));
const originalRef = refs[0];
refs.forEach((ref, index) => {
let link = document.createElement('a');
link.href = `#${ref.id}`;
link.textContent = ref.number;
cell.appendChild(link);
// Add similarity information
if (index > 0) {
const similarity = calculateSimilarityPercentage(
calculateLevenshteinDistance(originalRef.text, ref.text),
Math.max(originalRef.text.length, ref.text.length)
);
let similarityInfo = document.createElement('span');
similarityInfo.textContent = ` (${similarity})`;
cell.appendChild(similarityInfo);
}
link.addEventListener('mouseover', () => {
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
if (r.id === ref.id) {
citationElement.classList.add('duplicate-citation-hover');
} else {
citationElement.classList.add('duplicate-citation-highlight');
}
}
});
});
link.addEventListener('mouseout', () => {
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.remove('duplicate-citation-hover');
citationElement.classList.remove('duplicate-citation-highlight');
}
});
});
link.addEventListener('click', () => {
document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
el.classList.remove('duplicate-citation-clicked');
});
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.add('duplicate-citation-clicked');
}
});
});
if (index < refs.length - 1) {
cell.appendChild(document.createTextNode(', '));
}
});
row.appendChild(cell);
tbody.appendChild(row);
});
return table;
}
function checkDuplicateReferenceLinks(noChecking = false) {
if(document.querySelector('.duplicate-references-table .row')) {
debug("Stop recreating element.");
return;
}
var addingCollapsible = function(table) {
// Set up collapsible functionality
const toggleLink = table.querySelector('.mw-collapsible-toggle a');
const tableBody = $(table).find('tr:not(:first-child)');
const storageKey = 'duplicateReferencesTableState';
function setTableState(isCollapsed) {
if (isCollapsed) {
tableBody.hide();
toggleLink.textContent = mw.msg('dupeRef-show');
} else {
tableBody.show();
toggleLink.textContent = mw.msg('dupeRef-hide');
}
localStorage.setItem(storageKey, isCollapsed);
}
toggleLink.addEventListener('click', function (e) {
e.preventDefault();
const isCurrentlyCollapsed = tableBody.is(':hidden') || tableBody.length === 0;
setTableState(!isCurrentlyCollapsed);
checkDuplicateReferenceLinks(false);
});
// Initialize state from localStorage
const initialState = localStorage.getItem(storageKey) === 'true';
setTableState(initialState);
};
if (noChecking) {
debug("Creating collapsible table only.");
const table = createCollapsibleTable([]);
containerDiv.after(table);
addingCollapsible(table);
return;
}
const els = document.querySelectorAll('.duplicate-references-table');
if (els.length > 0) {
els.forEach(element => element.remove());
}
debug("Checking for duplicate reference links");
const duplicateInfo = getDuplicateInfo();
if (duplicateInfo.length > 0) {
debug("Duplicates found, creating collapsible table");
const table = createCollapsibleTable(duplicateInfo);
containerDiv.after(table);
const referenceItems = Array.from(referencesList.children);
table.querySelector('.tool-title>td>strong').innerText = mw.msg('dupeRef-toolTitleWithNumber', duplicateInfo.length, referenceItems.length);
addingCollapsible(table);
} else {
warn("No duplicates found");
}
}
const storageKey = 'duplicateReferencesTableState';
const initialState = localStorage.getItem(storageKey) === 'true';
checkDuplicateReferenceLinks(initialState && !checkAlways);
debug("Script execution completed");
});
});
// </nowiki>