Skip to content

Commit 19c380d

Browse files
committed
New fields to index.
1 parent a4f64e6 commit 19c380d

File tree

3 files changed

+19
-3
lines changed

3 files changed

+19
-3
lines changed

ahmia/search_indexes.py

+4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ class Website(indexes.SearchIndex, indexes.Indexable):
99
tor2web_url = indexes.CharField(model_attr='tor2web_url')
1010
text = indexes.CharField(document=True, use_template=True)
1111
title = indexes.CharField(model_attr='title')
12+
h1 = indexes.CharField(model_attr='h1')
13+
h2 = indexes.CharField(model_attr='h2')
14+
crawling_session = indexes.CharField(model_attr='crawling_session')
15+
server_header = indexes.CharField(model_attr='server_header')
1216
date_inserted = indexes.DateTimeField(model_attr='date_inserted')
1317
content_auto = indexes.EdgeNgramField(model_attr='content')
1418

ahmia/templates/search/indexes/ahmia/websiteindex_text.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
{{ object.title }}
22
{{ object.url }}
33
{{ object.text }}
4+
{{ object.h1 }}
5+
{{ object.h2 }}
6+
{{ object.crawling_session }}
7+
{{ object.server_header }}
48
{{ object.date_inserted }}
59
{{ object.tor2web_url }}
610
{{ object.domain }}

solr/schema.xml

+11-3
Original file line numberDiff line numberDiff line change
@@ -148,18 +148,26 @@
148148

149149
<field name="domain" type="text_en" indexed="true" stored="true" multiValued="false" />
150150

151-
<field name="tor2web_url" type="text_en" indexed="true" stored="true" multiValued="false" />
152-
153151
<field name="title" type="text_en" indexed="true" stored="true" multiValued="false" />
154152

155153
<field name="url" type="text_en" indexed="true" stored="true" multiValued="false" />
156154

157155
<field name="text" type="text_en" indexed="true" stored="true" multiValued="false" />
158156

159-
<field name="content_auto" type="edge_ngram" indexed="true" stored="true" multiValued="false" />
157+
<field name="h1" type="text_en" indexed="true" stored="true" multiValued="false" />
158+
159+
<field name="server_header" type="text_en" indexed="true" stored="true" multiValued="false" />
160+
161+
<field name="h2" type="text_en" indexed="true" stored="true" multiValued="false" />
160162

161163
<field name="date_inserted" type="date" indexed="true" stored="true" multiValued="false" />
162164

165+
<field name="tor2web_url" type="text_en" indexed="true" stored="true" multiValued="false" />
166+
167+
<field name="content_auto" type="edge_ngram" indexed="true" stored="true" multiValued="false" />
168+
169+
<field name="crawling_session" type="text_en" indexed="true" stored="true" multiValued="false" />
170+
163171
</fields>
164172

165173
<!-- field to use to determine and enforce document uniqueness. -->

0 commit comments

Comments
 (0)