aboutsummaryrefslogtreecommitdiffstats
path: root/spec/models/xapian_spec.rb
blob: 7aab9cdc627f9221adafb288c2128ccba61bb970 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
# encoding: utf-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')

describe User, " when indexing users with Xapian" do

    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should search by name" do
        xapian_object = ActsAsXapian::Search.new([User], "Silly", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == users(:silly_name_user)
    end

    it "should search by 'about me' text" do
        user = users(:bob_smith_user)

        xapian_object = ActsAsXapian::Search.new([User], "stuff", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == user

        user.about_me = "I am really an aardvark, true story."
        user.save!
        update_xapian_index

        xapian_object = ActsAsXapian::Search.new([User], "stuff", :limit => 100)
        xapian_object.results.size.should == 0

        xapian_object = ActsAsXapian::Search.new([User], "aardvark", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == user
    end
end

describe PublicBody, " when indexing public bodies with Xapian" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should search index the main name field" do
        xapian_object = ActsAsXapian::Search.new([PublicBody], "humpadinking", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == public_bodies(:humpadink_public_body)
    end

    it "should search index the notes field" do
        xapian_object = ActsAsXapian::Search.new([PublicBody], "albatross", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == public_bodies(:humpadink_public_body)
    end

    it "should delete public bodies from the index when they are destroyed" do
        xapian_object = ActsAsXapian::Search.new([PublicBody], "albatross", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == public_bodies(:humpadink_public_body)

        public_bodies(:forlorn_public_body).destroy

        update_xapian_index
        xapian_object = ActsAsXapian::Search.new([PublicBody], "lonely", :limit => 100)
        xapian_object.results.should == []
    end

end

describe PublicBody, " when indexing requests by body they are to" do

    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should find requests to the body" do
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:tgq", :limit => 100)
        xapian_object.results.size.should == PublicBody.find_by_url_name("tgq").info_requests.map(&:info_request_events).flatten.size
    end

    it "should update index correctly when URL name of body changes" do
        # initial search
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:tgq", :limit => 100)
        xapian_object.results.size.should == PublicBody.find_by_url_name("tgq").info_requests.map(&:info_request_events).flatten.size
        models_found_before = xapian_object.results.map { |x| x[:model] }

        # change the URL name of the body
        body = public_bodies(:geraldine_public_body)
        body.short_name = 'GQ'
        body.save!
        body.url_name.should == 'gq'
        update_xapian_index

        # check we get results expected
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:tgq", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:gq", :limit => 100)
        xapian_object.results.size.should == PublicBody.find_by_url_name("gq").info_requests.map(&:info_request_events).flatten.size
        models_found_after = xapian_object.results.map { |x| x[:model] }

        models_found_before.should == models_found_after
    end

    # if you index via the Xapian TermGenerator, it ignores terms of this length,
    # this checks we're using Document:::add_term() instead
    it "should work with URL names that are longer than 64 characters" do
        # change the URL name of the body
        body = public_bodies(:geraldine_public_body)
        body.short_name = 'The Uncensored, Complete Name of the Quasi-Autonomous Public Body Also Known As Geraldine'
        body.save!
        body.url_name.size.should > 70
        update_xapian_index

        # check we get results expected
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:tgq", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:gq", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_from:#{body.url_name}", :limit => 100)
        xapian_object.results.size.should == public_bodies(:geraldine_public_body).info_requests.map(&:info_request_events).flatten.size
        models_found_after = xapian_object.results.map { |x| x[:model] }
    end
end

describe User, " when indexing requests by user they are from" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should find requests from the user" do
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:bob_smith",
            :sort_by_prefix => 'created_at', :sort_by_ascending => true, :limit => 100)
        xapian_object.results.map{|x|x[:model]}.should =~ InfoRequestEvent.all(:conditions => "info_request_id in (select id from info_requests where user_id = #{users(:bob_smith_user).id})")
    end

    it "should find just the sent message events from a particular user" do
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:bob_smith variety:sent",
            :sort_by_prefix => 'created_at', :sort_by_ascending => true, :limit => 100)
        xapian_object.results.map{|x|x[:model]}.should =~ InfoRequestEvent.all(:conditions => "info_request_id in (select id from info_requests where user_id = #{users(:bob_smith_user).id}) and event_type = 'sent'")
        xapian_object.results[2][:model].should == info_request_events(:useless_outgoing_message_event)
        xapian_object.results[1][:model].should == info_request_events(:silly_outgoing_message_event)
    end

    it "should not find it when one of the request's users is changed" do
        silly_user = users(:silly_name_user)
        naughty_chicken_request = info_requests(:naughty_chicken_request)
        naughty_chicken_request.user = silly_user
        naughty_chicken_request.save!

        update_xapian_index

        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:bob_smith",
            :sort_by_prefix => 'created_at', :sort_by_ascending => true,
            :collapse_by_prefix => 'request_collapse', :limit => 100)
        xapian_object.results.map{|x|x[:model].info_request}.should =~ InfoRequest.all(:conditions => "user_id = #{users(:bob_smith_user).id}")
    end

    it "should not get confused searching for requests when one user has a name which has same stem as another" do
        bob_smith_user = users(:bob_smith_user)
        bob_smith_user.name = "John King"
        bob_smith_user.url_name.should == 'john_king'
        bob_smith_user.save!

        silly_user = users(:silly_name_user)
        silly_user.name = "John K"
        silly_user.url_name.should == 'john_k'
        silly_user.save!

        naughty_chicken_request = info_requests(:naughty_chicken_request)
        naughty_chicken_request.user = silly_user
        naughty_chicken_request.save!

        update_xapian_index

        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:john_k", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model].should == info_request_events(:silly_outgoing_message_event)
    end


    it "should update index correctly when URL name of user changes" do
        # initial search
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:bob_smith",
            :sort_by_prefix => 'created_at', :sort_by_ascending => true, :limit => 100)
        xapian_object.results.map{|x|x[:model]}.should =~ InfoRequestEvent.all(:conditions => "info_request_id in (select id from info_requests where user_id = #{users(:bob_smith_user).id})")
        models_found_before = xapian_object.results.map { |x| x[:model] }

        # change the URL name of the body
        u= users(:bob_smith_user)
        u.name = 'Robert Smith'
        u.save!
        u.url_name.should == 'robert_smith'
        update_xapian_index

        # check we get results expected
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:bob_smith", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "requested_by:robert_smith",
            :sort_by_prefix => 'created_at', :sort_by_ascending => true, :limit => 100)
        models_found_after = xapian_object.results.map { |x| x[:model] }
        models_found_before.should == models_found_after
    end
end

describe User, " when indexing comments by user they are by" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should find requests from the user" do
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "commented_by:silly_emnameem", :limit => 100)
        xapian_object.results.size.should == 1
    end

    it "should update index correctly when URL name of user changes" do
        # initial search
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "commented_by:silly_emnameem", :limit => 100)
        xapian_object.results.size.should == 1
        models_found_before = xapian_object.results.map { |x| x[:model] }

        # change the URL name of the body
        u = users(:silly_name_user)
        u.name = 'Silly Name'
        u.save!
        u.url_name.should == 'silly_name'
        update_xapian_index

        # check we get results expected
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "commented_by:silly_emnameem", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "commented_by:silly_name", :limit => 100)
        xapian_object.results.size.should == 1
        models_found_after = xapian_object.results.map { |x| x[:model] }

        models_found_before.should == models_found_after
    end
end

describe InfoRequest, " when indexing requests by their title" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should find events for the request" do
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "request:how_much_public_money_is_wasted_o", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model] == info_request_events(:silly_outgoing_message_event)
    end

    it "should update index correctly when URL title of request changes" do
        # change the URL name of the body
        ir = info_requests(:naughty_chicken_request)
        ir.title = 'Really naughty'
        ir.save!
        ir.url_title.should == 'really_naughty'
        update_xapian_index

        # check we get results expected
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "request:how_much_public_money_is_wasted_o", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "request:really_naughty", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model] == info_request_events(:silly_outgoing_message_event)
    end
end

describe InfoRequest, " when indexing requests by tag" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should find request by tag, even when changes" do
        ir = info_requests(:naughty_chicken_request)
        ir.tag_string = 'bunnyrabbit'
        ir.save!
        update_xapian_index

        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "tag:bunnyrabbit", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model] == info_request_events(:silly_outgoing_message_event)

        xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], "tag:orangeaardvark", :limit => 100)
        xapian_object.results.size.should == 0
    end
end

describe PublicBody, " when indexing authorities by tag" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should find request by tag, even when changes" do
        body = public_bodies(:geraldine_public_body)
        body.tag_string = 'mice:3'
        body.save!
        update_xapian_index

        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:mice", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model] == public_bodies(:geraldine_public_body)
        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:mice:3", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object.results[0][:model] == public_bodies(:geraldine_public_body)

        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:orangeaardvark", :limit => 100)
        xapian_object.results.size.should == 0
    end
end

describe PublicBody, " when only indexing selected things on a rebuild" do
    before(:each) do
        load_raw_emails_data
        get_fixtures_xapian_index
    end

    it "should only index what we ask it to" do
        body = public_bodies(:geraldine_public_body)
        body.tag_string = 'mice:3'
        body.name = 'frobzn'
        body.save!
        # only reindex 'variety' term
        dropfirst = true
        terms = "V"
        values = false
        texts = false
        rebuild_xapian_index(terms, values, texts, dropfirst)
        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:mice", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([PublicBody], "frobzn", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([PublicBody], "variety:authority", :limit => 100)
        xapian_object.results.map{|x|x[:model]}.should =~ PublicBody.all
        # only reindex 'tag' and text
        dropfirst = true
        terms = "U"
        values = false
        texts = true
        rebuild_xapian_index(terms, values, texts, dropfirst)
        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:mice", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object = ActsAsXapian::Search.new([PublicBody], "frobzn", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object = ActsAsXapian::Search.new([PublicBody], "variety:authority", :limit => 100)
        xapian_object.results.size.should == 0
        # only reindex 'variety' term, but keeping the existing data in-place
        dropfirst = false
        terms = "V"
        texts = false
        rebuild_xapian_index(terms, values, texts, dropfirst)
        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:mice", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object = ActsAsXapian::Search.new([PublicBody], "frobzn", :limit => 100)
        xapian_object.results.size.should == 1
        xapian_object = ActsAsXapian::Search.new([PublicBody], "variety:authority", :limit => 100)
        xapian_object.results.map{|x|x[:model]}.should =~ PublicBody.all
        # only reindex 'variety' term, blowing away existing data
        dropfirst = true
        rebuild_xapian_index(terms, values, texts, dropfirst)
        xapian_object = ActsAsXapian::Search.new([PublicBody], "tag:mice", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([PublicBody], "frobzn", :limit => 100)
        xapian_object.results.size.should == 0
        xapian_object = ActsAsXapian::Search.new([PublicBody], "variety:authority", :limit => 100)
        xapian_object.results.map{|x|x[:model]}.should =~ PublicBody.all
    end
end

# I would expect ActsAsXapian to have some tests under vendor/plugins/acts_as_xapian, but
# it looks like this is not the case. Putting a test here instead.
describe ActsAsXapian::Search, "#words_to_highlight" do
    before(:each) do
         load_raw_emails_data
         get_fixtures_xapian_index
     end

    it "should return a list of words used in the search" do
        s = ActsAsXapian::Search.new([PublicBody], "albatross words", :limit => 100)
        s.words_to_highlight.should == ["albatross", "words"]
    end

    it "should remove any operators" do
        s = ActsAsXapian::Search.new([PublicBody], "albatross words tag:mice", :limit => 100)
        s.words_to_highlight.should == ["albatross", "words"]
    end

    # This is the current behaviour but it seems a little simplistic to me
    it "should separate punctuation" do
        s = ActsAsXapian::Search.new([PublicBody], "The doctor's patient", :limit => 100)
        s.words_to_highlight.should == ["The", "doctor", "s", "patient"] 
    end

    it "should handle non-ascii characters" do
        s = ActsAsXapian::Search.new([PublicBody], "adatigénylés words tag:mice", :limit => 100)
        s.words_to_highlight.should == ["adatigénylés", "words"]
    end

end