Console.WriteLine("\t-t\tвыполнить распознавание t-текстов");
Console.WriteLine("\t-help\tвызвать справку по команде");
}
else
Console.WriteLine("Программа успешно выполнена!");
}
}
}
Файл TextNormalizer. cs
using System;
using System. Collections. Generic;
using System. Linq;
using System. Text;
using System. IO;
using *****ntime. InteropServices;
namespace JSM_VS
{
class TextNormalizer
{
private struct PROCESS_INFORMATION
{
public IntPtr hProcess;
public IntPtr hThread;
public uint dwProcessId;
public uint dwThreadId;
}
private struct STARTUPINFO
{
public uint cb;
public string lpReserved;
public string lpDesktop;
public string lpTitle;
public uint dwX;
public uint dwY;
public uint dwXSize;
public uint dwYSize;
public uint dwXCountChars;
public uint dwYCountChars;
public uint dwFillAttribute;
public uint dwFlags;
public short wShowWindow;
public short cbReserved2;
public IntPtr lpReserved2;
public IntPtr hStdInput;
public IntPtr hStdOutput;
public IntPtr hStdError;
}
[DllImport("kernel32.dll")]
static extern bool CreateProcess(string lpApplicationName, string lpCommandLine, IntPtr lpProcessAttributes, IntPtr lpThreadAttributes,
bool bInheritHandles, uint dwCreationFlags, IntPtr lpEnvironment,
string lpCurrentDirectory, ref STARTUPINFO lpStartupInfo, out PROCESS_INFORMATION lpProcessInformation);
/// <summary>
/// Процедура нормализации текстов
/// </summary>
/// <param name="source"></param>
/// <param name="destination"></param>
public void Normalize(string source, string destination)
{
DirectoryInfo diSource = new DirectoryInfo(source);
DirectoryInfo diDestination = new DirectoryInfo(destination);
FileInfo[] fiSource = diSource. GetFiles();
for (int i = 0; i < fiSource. Length; i++)
{
STARTUPINFO si = new STARTUPINFO();
PROCESS_INFORMATION pi = new PROCESS_INFORMATION();
string fileInPath = source + "\\" + fiSource[i].Name;
// Опции программы mystem
// n - построчный режим; каждое слово печатается на новой строке
// l - не печатать исходные словоформы, только леммы и граммемы
// i - печатать грамматическую информацию
string options = " - nli ";
string fileOutPath = destination + "\\" + "norm_" + fiSource[i].Name;
CreateProcess("mystem. exe", "mystem. exe " + fileInPath + options + fileOutPath,
IntPtr.Zero, IntPtr.Zero, false, 0, IntPtr.Zero, null, ref si, out pi);
}
}
}
}
Файл CrossValidation. cs
using System;
using System. Collections. Generic;
using System. Linq;
using System. Text;
namespace JSM_VS
{
class CrossValidator
{
/// <summary>
/// Вычисление метрик эффективности анализа
/// </summary>
/// <param name="resForPlusTestCollect"></param>
/// <param name="resForMinusTestCollect"></param>
/// <param name="koeff"></param>
static void CalcKoeff(char[] resForPlusTestCollect, char[] resForMinusTestCollect, ref EffectMeasure koeff)
{
int TP = 0, FP = 0, TN = 0, FN = 0;
for (int i = 0; i < resForPlusTestCollect. Length; i++)
{
if (resForPlusTestCollect[i] == '+')
TP++;
else if (resForPlusTestCollect[i] == '-')
FP++;
}
for (int i = 0; i < resForMinusTestCollect. Length; i++)
{
if (resForMinusTestCollect[i] == '-')
TN++;
else if (resForMinusTestCollect[i] == '+')
FN++;
}
koeff. precission = (double)TP / (double)(TP + FP);
koeff. recall = (double)TP / (double)(TP + FN);
koeff. accuracy = (double)(TP + TN) / (double)(TP + TN + FN + FP);
koeff. f1_measure = 2 * koeff. precission * koeff. recall / (koeff. precission + koeff. recall);
}
/// <summary>
/// Запуск процедуры кросс-валидации
/// </summary>
/// <param name="plusSamples"></param>
/// <param name="minusSamples"></param>
/// <param name="averAffectMeasure"></param>
/// <param name="n"></param>
/// <param name="imbalance"></param>
public void ProcessTexts(TextInfo[] plusSamples, TextInfo[] minusSamples,
ref EffectMeasure averAffectMeasure, int n, double imbalance, int typeFuncResolution)
{
for (int i = 0; i < n; i++)
{
// Тестовая коллекция
TextInfo[] plusTestCollection = null;
TextInfo[] minusTestCollection = null;
// Обучающая коллекция
TextInfo[] plusTrainingCollection = null;
TextInfo[] minusTrainingCollection = null;
int sizePlusBlock = plusSamples. Length / n;
int sizeMinusBlock = minusSamples. Length / n;
if (plusSamples. Length - (i + 1) * sizePlusBlock >= sizePlusBlock &&
minusSamples. Length - (i + 1) * sizeMinusBlock >= sizeMinusBlock)
{
// Инициализация тестовой коллекции
plusTestCollection = new TextInfo[sizePlusBlock];
minusTestCollection = new TextInfo[sizeMinusBlock];
for (int j = 0; j < sizePlusBlock; j++)
plusTestCollection[j] = plusSamples[i * sizePlusBlock + j];
for (int j = 0; j < sizeMinusBlock; j++)
minusTestCollection[j] = minusSamples[i * sizeMinusBlock + j];
// Инициализация обучающей коллекции
plusTrainingCollection = new TextInfo[plusSamples. Length - sizePlusBlock];
minusTrainingCollection = new TextInfo[minusSamples. Length - sizeMinusBlock];
for (int j = 0; j < i * sizePlusBlock; j++)
plusTrainingCollection[j] = plusSamples[j];
for (int j = (i + 1) * sizePlusBlock; j < plusSamples. Length; j++)
plusTrainingCollection[j - sizePlusBlock] = plusSamples[j];
for (int j = 0; j < i * sizeMinusBlock; j++)
minusTrainingCollection[j] = minusSamples[j];
for (int j = (i + 1) * sizeMinusBlock; j < minusSamples. Length; j++)
minusTrainingCollection[j - sizeMinusBlock] = minusSamples[j];
}
else
{
// Инициализация тестовой коллекции
plusTestCollection = new TextInfo[plusSamples. Length - i * sizePlusBlock];
minusTestCollection = new TextInfo[minusSamples. Length - i * sizeMinusBlock];
for (int j = 0; j < plusSamples. Length - i * sizePlusBlock; j++)
plusTestCollection[j] = plusSamples[i * sizePlusBlock + j];
for (int j = 0; j < minusSamples. Length - i * sizeMinusBlock; j++)
minusTestCollection[j] = minusSamples[i * sizeMinusBlock + j];
// Инициализация обучающей коллекции
plusTrainingCollection = new TextInfo[i * sizePlusBlock];
minusTrainingCollection = new TextInfo[i * sizeMinusBlock];
for (int j = 0; j < i * sizePlusBlock; j++)
plusTrainingCollection[j] = plusSamples[j];
for (int j = 0; j < i * sizeMinusBlock; j++)
minusTrainingCollection[j] = minusSamples[j];
}
// Список гипотез
List<Hypothesis> plusHypotheses = new List<Hypothesis>();
List<Hypothesis> minusHypotheses = new List<Hypothesis>();
JsmProcessor jp = new JsmProcessor();
// Индукция
jp. Induction(plusHypotheses, plusTrainingCollection);
jp. Induction(minusHypotheses, minusTrainingCollection);
List<Hypothesis>[] coincidedHypForPlusSamples;
List<Hypothesis>[] coincidedHypForMinusSamples;
// Аналогия
jp. Analogy(plusHypotheses, minusHypotheses, plusTestCollection, out coincidedHypForPlusSamples);
jp. Analogy(plusHypotheses, minusHypotheses, minusTestCollection, out coincidedHypForMinusSamples);
// Обработка примеров классификатором
// '+' - пример положительный
// '-' - пример отрицательный
// 'n' - класс примера не определен
char[] resForPlusTestCollection = new char[plusTestCollection. Length];
char[] resForMinusTestCollection = new char[minusTestCollection. Length];
jp. Classification(resForPlusTestCollection, coincidedHypForPlusSamples, imbalance, typeFuncResolution);
jp. Classification(resForMinusTestCollection, coincidedHypForMinusSamples, imbalance, typeFuncResolution);
EffectMeasure koeff = new EffectMeasure();
CalcKoeff(resForPlusTestCollection, resForMinusTestCollection, ref koeff);
if (i < n)
{
averAffectMeasure. precission += koeff. precission;
averAffectMeasure. recall += koeff. recall;
averAffectMeasure. f1_measure += koeff. f1_measure;
averAffectMeasure. accuracy += koeff. accuracy;
}
if (i == n - 1)
{
averAffectMeasure. precission = averAffectMeasure. precission / n;
averAffectMeasure. recall = averAffectMeasure. recall / n;
averAffectMeasure. f1_measure = averAffectMeasure. f1_measure / n;
averAffectMeasure. accuracy = averAffectMeasure. accuracy / n;
}
}
}
}
}
Файл JsmProcessor. cs
|
Из за большого объема этот материал размещен на нескольких страницах:
1 2 3 4 5 6 7 8 9 |


