Skip to content

update #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Analyser/Analyser.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<TargetFramework>netstandard2.1</TargetFramework>
<PackageId>Lucene.JIEba.Analyzer</PackageId>
<PackageVersion>1.0.0</PackageVersion>
<Authors>SilentCC</Authors>
Expand Down
18 changes: 18 additions & 0 deletions ConsoleApp1/ConsoleApp1.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00016" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\jieba.NET\jieba.NET.csproj" />
</ItemGroup>

</Project>
107 changes: 107 additions & 0 deletions ConsoleApp1/LuceneHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
using jieba.NET;
using JiebaNet.Segmenter;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace ConsoleApp1
{
internal class LuceneHelper
{
public static IndexWriter GetIndexWriter()
{
var dir = FSDirectory.Open("Index_Data");
//Analyzer analyzer = new SmartChineseAnalyzer(LuceneVersion.LUCENE_48);
Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search);

var indexConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
IndexWriter writer = new IndexWriter(dir, indexConfig);
return writer;
}

public static void WriteDocument(string url, string title, string keywords, string description)
{
using (var writer = GetIndexWriter())
{
writer.DeleteDocuments(new Term("url", url));

Document doc = new Document();
doc.Add(new StringField("url", url, Field.Store.YES));

TextField titleField = new TextField("title", title, Field.Store.YES);
titleField.Boost = 3F;

TextField keywordField = new TextField("keyword", keywords, Field.Store.YES);
keywordField.Boost = 2F;

TextField descriptionField = new TextField("description", description, Field.Store.YES);
descriptionField.Boost = 1F;

doc.Add(titleField);
doc.Add(keywordField);
doc.Add(descriptionField);
writer.AddDocument(doc);
writer.Flush(triggerMerge: true, applyAllDeletes: true);
writer.Commit();
}
}

public static List<string> GetKeyWords(string q)
{
List<string> keyworkds = new List<string>();
Analyzer analyzer = new JieBaAnalyzer(TokenizerMode.Search);
using (var ts = analyzer.GetTokenStream(null, q))
{
ts.Reset();
var ct = ts.GetAttribute<Lucene.Net.Analysis.TokenAttributes.ICharTermAttribute>();

while (ts.IncrementToken())
{
StringBuilder keyword = new StringBuilder();
for (int i = 0; i < ct.Length; i++)
{
keyword.Append(ct.Buffer[i]);
}
string item = keyword.ToString();
if (!keyworkds.Contains(item))
{
keyworkds.Add(item);
}
}
}
return keyworkds;
}
public static void Search(string q)
{
IndexReader reader = DirectoryReader.Open(FSDirectory.Open("Index_Data"));

var searcher = new IndexSearcher(reader);

var keyWordQuery = new BooleanQuery();
foreach (var item in GetKeyWords(q))
{
keyWordQuery.Add(new TermQuery(new Term("title", item)), Occur.SHOULD);
keyWordQuery.Add(new TermQuery(new Term("keyword", item)), Occur.SHOULD);
keyWordQuery.Add(new TermQuery(new Term("description", item)), Occur.SHOULD);
}
var hits = searcher.Search(keyWordQuery, 200).ScoreDocs;

foreach (var hit in hits)
{
var document = searcher.Doc(hit.Doc);
Console.WriteLine("Url:{0}", document.Get("url"));
Console.WriteLine("Title:{0}", document.Get("title"));
Console.WriteLine("Keyword:{0}", document.Get("keyword"));
Console.WriteLine("Description:{0}", document.Get("description"));
}
}
}
}
7 changes: 7 additions & 0 deletions ConsoleApp1/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
using ConsoleApp1;

LuceneHelper.WriteDocument("https://www.baidu.com", "百度一下你就知道", "", "全球领先的中文搜索引擎、致力于让网民更便捷地获取信息,找到所求。百度超过千亿的中文网页数据库,可以瞬间找到相关的搜索结果。");

LuceneHelper.WriteDocument("https://www.baidu.com", "百度一下你就知道", "", "全球领先的中文搜索引擎、致力于让网民更便捷地获取信息,找到所求。百度超过千亿的中文网页数据库,可以瞬间找到相关的搜索结果。");

LuceneHelper.Search("百度中文搜索");
8 changes: 4 additions & 4 deletions Segmenter/Common/Extensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ public static bool IsNotEmpty<T>(this IEnumerable<T> enumerable)
return (enumerable != null) && enumerable.Any();
}

public static TValue GetValueOrDefault<TKey, TValue>(this IDictionary<TKey, TValue> d, TKey key)
{
return d.ContainsKey(key) ? d[key] : default(TValue);
}
//public static TValue GetValueOrDefault<TKey, TValue>(this IDictionary<TKey, TValue> d, TKey key)
//{
// return d.ContainsKey(key) ? d[key] : default(TValue);
//}

public static TValue GetDefault<TKey, TValue>(this IDictionary<TKey, TValue> dict, TKey key, TValue defaultValue)
{
Expand Down
2 changes: 1 addition & 1 deletion Segmenter/Segmenter.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<TargetFramework>netstandard2.1</TargetFramework>
<PackageId>Lucene.JIEba.Segment</PackageId>
<PackageVersion>1.0.0</PackageVersion>
<Authors>SilentCC</Authors>
Expand Down
2 changes: 1 addition & 1 deletion Test/Test.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp2.2</TargetFramework>
<TargetFramework>netcoreapp3.1</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>

Expand Down
21 changes: 15 additions & 6 deletions jieba.NET.sln
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26228.4
# Visual Studio Version 17
VisualStudioVersion = 17.2.32630.192
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "jieba.NET", "jieba.NET\jieba.NET.csproj", "{89EFA758-206C-4681-ACF6-6F2AB2415279}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "jieba.NET", "jieba.NET\jieba.NET.csproj", "{89EFA758-206C-4681-ACF6-6F2AB2415279}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Analyser", "Analyser\Analyser.csproj", "{4F0DEF27-C5FE-448F-9B08-F8C2254A1075}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Analyser", "Analyser\Analyser.csproj", "{4F0DEF27-C5FE-448F-9B08-F8C2254A1075}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Segmenter", "Segmenter\Segmenter.csproj", "{C564CDCB-B52B-455E-86E9-FC0DAE37EF08}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Segmenter", "Segmenter\Segmenter.csproj", "{C564CDCB-B52B-455E-86E9-FC0DAE37EF08}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Test\Test.csproj", "{37B4E86E-2759-47CE-A59A-4301EE500BD8}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Test", "Test\Test.csproj", "{37B4E86E-2759-47CE-A59A-4301EE500BD8}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleApp1", "ConsoleApp1\ConsoleApp1.csproj", "{2D841415-1FFF-414B-9A51-50F97D670E5F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -33,8 +35,15 @@ Global
{37B4E86E-2759-47CE-A59A-4301EE500BD8}.Debug|Any CPU.Build.0 = Debug|Any CPU
{37B4E86E-2759-47CE-A59A-4301EE500BD8}.Release|Any CPU.ActiveCfg = Release|Any CPU
{37B4E86E-2759-47CE-A59A-4301EE500BD8}.Release|Any CPU.Build.0 = Release|Any CPU
{2D841415-1FFF-414B-9A51-50F97D670E5F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2D841415-1FFF-414B-9A51-50F97D670E5F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2D841415-1FFF-414B-9A51-50F97D670E5F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2D841415-1FFF-414B-9A51-50F97D670E5F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {BB0FB2D0-F6BA-4D78-A12D-37BBDDAD8BE0}
EndGlobalSection
EndGlobal
14 changes: 7 additions & 7 deletions jieba.NET/jieba.NET.csproj
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<PackageId>Lucene.JIEba.net</PackageId>
<TargetFramework>netstandard2.1</TargetFramework>
<PackageId>Lucene.new.Jieba.Net</PackageId>
<PackageVersion>1.1.1</PackageVersion>
<Authors>SilentCC</Authors>
<Description>JIEba.Lucene.Net is an analyzer tools for lucene.net which is kind to chinese</Description>
<Description>JIEba.Lucene.Net is an analyzer tools for lucene.net which is kind to chinese,fork from https://github.com/SilentCC/JIEba-netcore2.0</Description>
<PackageRequireLicenseAcceptance>false</PackageRequireLicenseAcceptance>
<PackageProjectUrl>https://github.com/SilentCC/JIEba-netcore2.0/</PackageProjectUrl>
<Copyright>Copyright 2019 (c) AgileLabs. All rights reserved.</Copyright>
Expand All @@ -17,10 +17,10 @@
<ItemGroup>
<PackageReference Include="Lucene.JIEba.Analyzer" Version="1.0.0" />
<PackageReference Include="Lucene.JIEba.Segment" Version="1.0.1" />
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="4.4.0" />
<PackageReference Include="Microsoft.Extensions.FileProviders.Embedded" Version="1.0.0" />
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00016" />
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00016" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="6.0.0" />
<PackageReference Include="Microsoft.Extensions.FileProviders.Embedded" Version="6.0.8" />
</ItemGroup>
<ItemGroup>
<None Remove="stopwords.txt" />
Expand Down