Меню
Главная
Случайная статья
Настройки
|
using System;
using System.Text;
using System.Linq;
using DotNetWikiBot;
using System.IO;
using System.Text.RegularExpressions;
using System.Collections.Generic;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Net;
namespace Wikipedia
{
static class Program
{
static string outDir;
static Dictionary<string, Site> sites = new Dictionary<string, Site>();
const string login = "1botand1";
const string password = ;
static Site ruWiki = new Site("https://ru.wikipedia.org", login, password);
static PageList pl = new PageList(ruWiki);
static Regex r = new Regex(@"{{\s*(?>[Нн]п[1-5]|[Нн]е[\s_]переведено[\s_]?[1-5]?|[Ii]w|[Uu]t[34])\s*\|.*?}}", RegexOptions.Compiled | RegexOptions.Singleline);
static void Main()
{
Bot.EnableSilenceMode();
outDir = Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "out");
if (!Directory.Exists(outDir))
Directory.CreateDirectory(outDir);
ruWiki.defaultEditComment = "Бот: исправление шаблонов не переведено";
}
static void AllTemplates()
{
foreach (string template in ((JArray)JsonConvert.DeserializeObject(File.ReadAllText(Path.Combine(Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location), "templates.json")))))
{
DateTime startTime = DateTime.Now;
pl.FillFromTransclusionsOfPage("Шаблон:" + template);
Bot.DisableSilenceMode();
Console.OutputEncoding = Encoding.UTF8;
Console.WriteLine($"Template {template} has been succesfully loaded, found " + pl.Count() + " transclusions. It took " + (DateTime.Now - startTime).TotalSeconds + " seconds");
Bot.EnableSilenceMode();
foreach (Page p in pl)
{
try
{
if (p.GetNamespace() != 0)
continue;
p.LoadTextOnly();
var matches = r.Matches(p.text);
string output1 = p.title + ": {\n";
string output2 = p.title + ": {\n";
string output3 = p.title + ": {\n";
foreach (Match m in matches)
{
NotTranslated nt = NotTranslated.GetFromWikiText(m.Value);
Page foreign = new Page(GetSite(nt.lang), nt.foreignName);
foreign.LoadTextOnly();
if (foreign.Exists())
{
if (foreign.IsDisambig())
{
output1 += $" {m.Value}\n";
continue;
}
if (foreign.GetRussianInterWiki(out string rusIW) && !new Page(ruWiki, rusIW).IsDisambig())
output2 += $" {m.Value}\n";
//p.text = p.text.Replace(m.Value, $"[[{rusIW}|{nt.text}]]");
else if (foreign.IsRedirect())
{
foreign.ResolveRedirect();
if (foreign.GetRussianInterWiki(out rusIW) && !new Page(ruWiki, rusIW).IsDisambig())
//p.text = p.text.Replace(m.Value, $"[[{rusIW}|{nt.text}]]");
output2 += $" {m.Value}\n";
}
}
else
{
output3 += $" {m.Value}\n";
p.text = p.text.Replace(m.Value, nt.text);
//p.SaveToFile(Path.Combine(outDir, "pages", p.title + ".txt"));
}
}
Bot.DisableSilenceMode();
if (output1.Length > p.title.Length + 4)
{
File.AppendAllText(Path.Combine(outDir, "Pages with templates referring to disambig pages.txt"), output1 + "}\n");
Console.WriteLine(p.title + " added in Pages with templates referring to disambig pages");
}
if (output2.Length > p.title.Length + 4)
{
File.AppendAllText(Path.Combine(outDir, "Foreignpages with russian IW but not in template.txt"), output2 + "}\n");
Console.WriteLine(p.title + " Foreignpages with russian IW but not in template");
}
if (output3.Length > p.title.Length + 4)
{
File.AppendAllText(Path.Combine(outDir, "Pages with templates referring to non-existent pages.txt"), output3 + "}\n");
Console.WriteLine(p.title + " added in Pages with templates referring to non-existent pages");
}
//p.Save();
//p.SaveToFile(Path.Combine(outDir, "pages", p.title + ".txt"));
Bot.EnableSilenceMode();
}
catch (Exception e)
{
File.AppendAllText(Path.Combine(outDir, "exceptions.txt"), e.Message + " " + p.title + "\n");
}
}
pl.Clear();
}
}
static bool GetRussianInterWiki(this Page page, out string name)
{
string name_ = "";
bool result = false;
page.GetInterLanguageLinks().AsParallel().ForAll(x =>
{
if (x.Contains("ru:"))
{
result = true;
name_ = x.Split(':').Last();
}
});
name = name_;
return result;
}
static void OldTemplates()
{
pl.FillAllFromCategory("Категория:Википедия:Статьи с неактуальным шаблоном Не переведено");
foreach (Page p in pl)
{
if (p.GetNamespace() != 0)
continue;
p.LoadTextOnly();
var matches = r.Matches(p.text);
foreach (Match m in matches)
{
NotTranslated nt = NotTranslated.GetFromWikiText(m.Value);
Page rp = new Page(ruWiki, nt.russianName);
rp.LoadTextOnly();
if (!rp.Exists())
continue;
if (!AreInterWikis(rp, nt.lang, nt.foreignName))
{
if (rp.IsRedirect())
rp.ResolveRedirect();
if (!AreInterWikis(rp, nt.lang, nt.foreignName))
{
Page fp = new Page(GetSite(nt.lang), nt.foreignName);
if (fp.IsRedirect())
fp.ResolveRedirect();
string _;
if (!fp.GetRussianInterWiki(out _) || _ != p.title)
{
File.AppendAllText(Path.Combine(outDir, "Pages with old templates but referring pages are not IWs.txt"), $"{p.title} в шаблоне {m.Value}\n");
continue;
}
}
}
p.text = p.text.Replace(m.Value, (nt.text == "") ? $"[[{nt.text}]]" : $"[[{nt.russianName}|{nt.text}]]");
}
//p.Save();
}
}
static bool AreInterWikis(Page russianPage, string lang, string foreignPage)
{
List<string> list = russianPage.GetInterLanguageLinks();
list.ForEach(v => v = v.ToLower());
return list.Contains(lang + ":" + foreignPage.ToLower());
}
static Site GetSite(string lang)
{
if (lang == "be-x-old")
lang = "be-tarask";
if (!sites.ContainsKey(lang))
sites.Add(lang, new Site($"https://{lang}.wikipedia.org", login, password));
return sites[lang];
}
}
struct NotTranslated
{
public string template;
public string russianName;
public string text;
public string lang;
public string foreignName;
public NotTranslated(string t, string rn, string text = "", string lang = "en", string fn = "")
{
template = t;
russianName = rn;
this.text = text;
this.lang = lang == "" ? "en" : lang;
foreignName = (fn == "") ? rn : fn;
}
public static NotTranslated GetFromWikiText(string wikiText)
{
wikiText = wikiText.Replace("{{", "").Replace("}}", "");
wikiText = Regex.Replace(wikiText, @"<!--.*?-->", "");
string[] parsRaw = wikiText.Split('|');
Dictionary<byte, string> pars = new Dictionary<byte, string>();
byte last = 0;
byte equalCount = 0;
for (byte i = 0; i < parsRaw.Length; i++)
{
string par = parsRaw[i];
Regex re = new Regex(@"(\d)=");
byte index = (byte)(i - equalCount);
if (re.IsMatch(par))
{
var m = re.Match(par);
index = Convert.ToByte(m.Groups[1].Value);
pars.Add(index, par.Replace(m.Value, ""));
equalCount++;
}
else
pars.Add(index, par);
if (index > last)
last = index;
}
if (pars.Count < 2)
throw new ArgumentException("Ошибочный шаблон");
for (byte i = 0; i <= last; i++)
{
if (!pars.ContainsKey(i))
pars.Add(i, "");
}
switch (pars.Count)
{
case 2:
return new NotTranslated(pars[0], pars[1]);
case 3:
return new NotTranslated(pars[0], pars[1], pars[2]);
case 4:
return new NotTranslated(pars[0], pars[1], pars[2], pars[3]);
default:
return new NotTranslated(pars[0], pars[1], pars[2], pars[3], pars[4]);
}
}
public string ToWikiText()
{
return $"{{{{{template}|{russianName}|{(text == russianName ? "" : text)}|{lang}|{(foreignName == russianName ? "" : foreignName)}}}}}";
}
public override string ToString()
{
return ToWikiText();
}
}
|
|