XRootD
Loading...
Searching...
No Matches
XrdOssCsiPagesUnaligned.cc
Go to the documentation of this file.
1/******************************************************************************/
2/* */
3/* X r d O s s C s i P a g e s U n a l i g n e d . c c */
4/* */
5/* (C) Copyright 2021 CERN. */
6/* */
7/* This file is part of the XRootD software suite. */
8/* */
9/* XRootD is free software: you can redistribute it and/or modify it under */
10/* the terms of the GNU Lesser General Public License as published by the */
11/* Free Software Foundation, either version 3 of the License, or (at your */
12/* option) any later version. */
13/* */
14/* In applying this licence, CERN does not waive the privileges and */
15/* immunities granted to it by virtue of its status as an Intergovernmental */
16/* Organization or submit itself to any jurisdiction. */
17/* */
18/* XRootD is distributed in the hope that it will be useful, but WITHOUT */
19/* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
20/* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
21/* License for more details. */
22/* */
23/* You should have received a copy of the GNU Lesser General Public License */
24/* along with XRootD in a file called COPYING.LESSER (LGPL license) and file */
25/* COPYING (GPL license). If not, see <http://www.gnu.org/licenses/>. */
26/* */
27/* The copyright holder's institutional names and contributor's names may not */
28/* be used to endorse or promote products derived from this software without */
29/* specific prior written permission of the institution or contributor. */
30/******************************************************************************/
31
32#include "XrdOssCsiTrace.hh"
33#include "XrdOssCsiPages.hh"
34#include "XrdOssCsiCrcUtils.hh"
35#include "XrdOuc/XrdOucCRC.hh"
37
38#include <vector>
39#include <assert.h>
40
43
44//
45// UpdateRangeHoleUntilPage
46//
47// Used pgWrite/Write (both aligned and unaligned cases) when extending a file
48// with implied zeros after then current end of file and the new one.
49// fd (data file descriptor pointer) required only when last page in file is partial.
50// current implementation does not use fd in this case, but requires it be set.
51//
52int XrdOssCsiPages::UpdateRangeHoleUntilPage(XrdOssDF *fd, const off_t until, const Sizes_t &sizes)
53{
54 EPNAME("UpdateRangeHoleUntilPage");
55
56 static const uint32_t crczero = CrcUtils.crc32c_extendwith_zero(0u, XrdSys::PageSize);
57 static const std::vector<uint32_t> crc32Vec(stsize_, crczero);
58 static const std::vector<uint32_t> crc32VecZ(stsize_, 0u);
59
60 const off_t trackinglen = sizes.first;
61 const off_t tracked_page = trackinglen / XrdSys::PageSize;
62 if (until <= tracked_page) return 0;
63
64 const size_t tracked_off = trackinglen % XrdSys::PageSize;
65
66 // if last tracked page is before page "until" extend it
67 if (tracked_off>0)
68 {
69 if (fd == NULL)
70 {
71 TRACE(Warn, "Unexpected partially filled last page " << fn_);
72 return -EDOM;
73 }
74
75 uint32_t prevtag;
76 const ssize_t rret = ts_->ReadTags(&prevtag, tracked_page, 1);
77 if (rret < 0)
78 {
79 TRACE(Warn, TagsReadError(tracked_page, 1, rret));
80 return rret;
81 }
82
83 // extend prevtag up to PageSize. If there is a mismatch it will only be
84 // discovered during a later read (but this saves a read now).
85 const uint32_t crc32c = CrcUtils.crc32c_extendwith_zero(prevtag, XrdSys::PageSize - tracked_off);
86 const ssize_t wret = ts_->WriteTags(&crc32c, tracked_page, 1);
87 if (wret < 0)
88 {
89 TRACE(Warn, TagsWriteError(tracked_page, 1, wret) << " (prev)");
90 return wret;
91 }
92 }
93
94 const off_t nAllEmpty = (tracked_off>0) ? (until - tracked_page - 1) : (until - tracked_page);
95 const off_t firstEmpty = (tracked_off>0) ? (tracked_page + 1) : tracked_page;
96
97 off_t towrite = nAllEmpty;
98 off_t nwritten = 0;
99 while(towrite>0)
100 {
101 off_t wblks = 0;
102 const uint32_t *wpointer = 0;
103 // unless we are configured with nofill (!nofill means writeHoles_ is true), we fill
104 // in the crc for an implied zero block. Reads from this hole will be valid, which
105 // is the usual behavior for sparse files. If nofill is configured it is assumed that the
106 // user knows the application does not expect to make such reads, so we set a zero tag
107 // so there will be a mismatch to catch the situation; we write the zero tag explicitly
108 // so that the tag file itself is not sparse.
109 if (writeHoles_)
110 {
111 wblks = (off_t)crc32Vec.size();
112 wpointer = &crc32Vec[0];
113 }
114 else
115 {
116 wblks = (off_t)crc32VecZ.size();
117 wpointer = &crc32VecZ[0];
118 }
119 const size_t nw = std::min(towrite, wblks);
120 const ssize_t wret = ts_->WriteTags(wpointer, firstEmpty+nwritten, nw);
121 if (wret<0)
122 {
123 TRACE(Warn, TagsWriteError(firstEmpty+nwritten, nw, wret) << " (new)");
124 return wret;
125 }
126 towrite -= wret;
127 nwritten += wret;
128 }
129
130 return 0;
131}
132
133// UpdateRangeUnaligned
134//
135// Used by Write for various cases with mis-alignment that need checksum recalculation. See StoreRangeUnaligned for list of conditions.
136//
137int XrdOssCsiPages::UpdateRangeUnaligned(XrdOssDF *const fd, const void *buff, const off_t offset, const size_t blen, const Sizes_t &sizes)
138{
139 return StoreRangeUnaligned(fd, buff, offset, blen, sizes, NULL);
140}
141
142//
143// used by StoreRangeUnaligned when the supplied data does not cover the whole of the first corresponding page in the file
144//
145// offset: offset in file for start of write
146// blen: length of write in first page
147//
148int XrdOssCsiPages::StoreRangeUnaligned_preblock(XrdOssDF *const fd, const void *const buff, const size_t blen,
149 const off_t offset, const off_t trackinglen,
150 const uint32_t *const csvec, uint32_t &prepageval)
151{
152 EPNAME("StoreRangeUnaligned_preblock");
153 const off_t p1 = offset / XrdSys::PageSize;
154 const size_t p1_off = offset % XrdSys::PageSize;
155
156 const off_t tracked_page = trackinglen / XrdSys::PageSize;
157 const size_t tracked_off = trackinglen % XrdSys::PageSize;
158
159 if (p1 > tracked_page)
160 {
161 // the start of will have a number of implied zero bytes
162 uint32_t crc32c = CrcUtils.crc32c_extendwith_zero(0u, p1_off);
163 if (csvec)
164 {
165 crc32c = CrcUtils.crc32c_combine(crc32c, csvec[0], blen);
166 }
167 else
168 {
169 crc32c = XrdOucCRC::Calc32C(buff, blen, crc32c);
170 }
171 prepageval = crc32c;
172 return 0;
173 }
174
175 // we're appending, or appending within the last page after a gap of zeros
176 if (p1 == tracked_page && p1_off >= tracked_off)
177 {
178 // appending: with or without some implied zeros.
179
180 // zero initialised value may be used
181 uint32_t crc32v = 0;
182 if (tracked_off > 0)
183 {
184 const ssize_t rret = ts_->ReadTags(&crc32v, p1, 1);
185 if (rret<0)
186 {
187 TRACE(Warn, TagsReadError(p1, 1, rret) << " (append)");
188 return rret;
189 }
190 }
191
192 uint32_t crc32c = 0;
193
194 // only do the loosewrite extending check one time for the page which was the
195 // last page according to the trackinglen at time the check was configured (open or size-resync).
196 // don't do the check every time because it needs an extra read compared to the non loose case;
197 // checklastpg_ is checked and modified here, but is protected from concurrent
198 // access because of the condition that p1==lastpgforloose_
199
201 {
202 checklastpg_ = false;
203 uint8_t b[XrdSys::PageSize];
204
205 // this will reissue read() until eof, or tracked_off bytes read but accept up to PageSize
206 const ssize_t rlen = XrdOssCsiPages::maxread(fd, b, XrdSys::PageSize * p1, XrdSys::PageSize, tracked_off);
207
208 if (rlen<0)
209 {
210 TRACE(Warn, PageReadError(tracked_off, p1, rlen));
211 return rlen;
212 }
213 memset(&b[rlen], 0, XrdSys::PageSize - rlen);
214
215 // in the loose-write mode, the new crc is based on the crc of data
216 // read from file up to p1_off, not on the previously stored tag.
217 // However must check if the data read were consistent with stored tag (crc32v)
218
219 uint32_t crc32x = XrdOucCRC::Calc32C(b, tracked_off, 0u);
220 crc32c = XrdOucCRC::Calc32C(&b[tracked_off], p1_off-tracked_off, crc32x);
221
222 do
223 {
224 if (static_cast<size_t>(rlen) == tracked_off)
225 {
226 // this is the expected match
227 if (tracked_off==0 || crc32x == crc32v) break;
228 }
229
230 // any bytes on disk beyond p1_off+blan would not be included in the new crc.
231 // if tracked_off==0 we have no meaningful crc32v value.
232 if ((tracked_off>0 || p1_off==0) && static_cast<size_t>(rlen) <= p1_off+blen)
233 {
234
235 if (tracked_off != 0)
236 {
237 TRACE(Warn, CRCMismatchError(tracked_off, p1, crc32x, crc32v) << " (loose match, still trying)");
238 }
239
240 // there was no tag recorded for the page, and we're completely overwriting anything on disk in the page
241 if (tracked_off==0)
242 {
243 TRACE(Warn, "Recovered page with no tag at offset " << (XrdSys::PageSize * p1) <<
244 " of file " << fn_ << " rlen=" << rlen << " (append)");
245 break;
246 }
247
248 if (static_cast<size_t>(rlen) != tracked_off && rlen>0)
249 {
250 crc32x = XrdOucCRC::Calc32C(b, rlen, 0u);
251 if (crc32x == crc32v)
252 {
253 TRACE(Warn, "Recovered page at offset " << (XrdSys::PageSize * p1)+p1_off << " of file " << fn_ << " (append)");
254 break;
255 }
256 TRACE(Warn, CRCMismatchError(rlen, p1, crc32x, crc32v) << " (loose match, still trying)");
257 }
258
259 memcpy(&b[p1_off], buff, blen);
260 crc32x = XrdOucCRC::Calc32C(b, p1_off+blen, 0u);
261 if (crc32x == crc32v)
262 {
263 TRACE(Warn, "Recovered matching write at offset " << (XrdSys::PageSize * p1)+p1_off <<
264 " of file " << fn_ << " (append)");
265 break;
266 }
267 TRACE(Warn, CRCMismatchError(p1_off+blen, p1, crc32x, crc32v) << " (append)");
268 }
269 else
270 {
271 if (tracked_off>0)
272 {
273 TRACE(Warn, CRCMismatchError(tracked_off, p1, crc32x, crc32v) << " (append)");
274 }
275 else
276 {
277 TRACE(Warn, "Unexpected content, write at page at offset " << (XrdSys::PageSize * p1) <<
278 " of file " << fn_ << ", offset-in-page=" << p1_off << " rlen=" << rlen << " (append)");
279 }
280 }
281 return -EDOM;
282 } while(0);
283 }
284 else
285 {
286 // non-loose case;
287 // can recalc crc with new data without re-reading existing partial block's data
288 const size_t nz = p1_off - tracked_off;
290 }
291
292 // crc32c is crc up to p1_off. Now add the user's data.
293 if (csvec)
294 {
295 crc32c = CrcUtils.crc32c_combine(crc32c, csvec[0], blen);
296 }
297 else
298 {
299 crc32c = XrdOucCRC::Calc32C(buff, blen, crc32c);
300 }
301 prepageval = crc32c;
302 return 0;
303 }
304
305 const size_t bavail = (p1==tracked_page) ? tracked_off : XrdSys::PageSize;
306
307 // assert we're overwriting some (or all) of the previous data (other case was above)
308 assert(p1_off < bavail);
309
310 // case p1_off==0 && blen>=bavail is either handled by aligned case (p1==tracked_page)
311 // or not sent to preblock, so will need to read some preexisting data
312 assert(p1_off !=0 || blen<bavail);
313 uint8_t b[XrdSys::PageSize];
314
315 uint32_t crc32v;
316 ssize_t rret = ts_->ReadTags(&crc32v, p1, 1);
317 if (rret<0)
318 {
319 TRACE(Warn, TagsReadError(p1, 1, rret) << " (overwrite)");
320 return rret;
321 }
322
323 // in either loosewrite or non-loosewrite a read-modify-write sequence is done and the
324 // final crc is that of the modified block. The difference between loose and non-loose
325 // case if that the looser checks are done on the block.
326 //
327 // in either case there are implicit verification(s) (e.g. pgWrite may return EDOM without Verify requested)
328 // as it's not clear if there is a meaningful way to crc a mismatching page during a partial overwrite
329
330 if (loosewrite_)
331 {
332 // this will reissue read() until eof, or bavail bytes read but accept up to PageSize
333 const ssize_t rlen = XrdOssCsiPages::maxread(fd, b, XrdSys::PageSize * p1, XrdSys::PageSize, bavail);
334 if (rlen<0)
335 {
336 TRACE(Warn, PageReadError(bavail, p1, rlen));
337 return rlen;
338 }
339 memset(&b[rlen], 0, XrdSys::PageSize - rlen);
340 do
341 {
342 uint32_t crc32c = XrdOucCRC::Calc32C(b, bavail, 0U);
343 // this is the expected case
344 if (static_cast<size_t>(rlen) == bavail && crc32c == crc32v) break;
345
346 // after this write there will be nothing changed between p1_off+blen
347 // and bavail; if there is nothing on disk in this range it will not
348 // be added by the write. So don't try to match crc with implied zero
349 // in this range. Beyond bavail bytes on disk will not be included
350 // in the new crc.
351 const size_t rmin = (p1_off+blen < bavail) ? bavail : 0;
352 if (static_cast<size_t>(rlen) >= rmin && static_cast<size_t>(rlen)<=bavail)
353 {
354 if (crc32c == crc32v)
355 {
356 TRACE(Warn, "Recovered page at offset " << (XrdSys::PageSize * p1) << " of file " << fn_ << " (overwrite)");
357 break;
358 }
359 TRACE(Warn, CRCMismatchError(bavail, p1, crc32c, crc32v) << " (loose match, still trying)");
360
361 if (static_cast<size_t>(rlen) != bavail && rlen > 0)
362 {
363 crc32c = XrdOucCRC::Calc32C(b, rlen, 0U);
364 if (crc32c == crc32v)
365 {
366 TRACE(Warn, "Recovered page (2) at offset " << (XrdSys::PageSize * p1) << " of file " << fn_ << " (overwrite)");
367 break;
368 }
369 TRACE(Warn, CRCMismatchError(rlen, p1, crc32c, crc32v) << " (loose match, still trying)");
370 }
371
372 memcpy(&b[p1_off], buff, blen);
373 const size_t vl = std::max(bavail, p1_off+blen);
374 crc32c = XrdOucCRC::Calc32C(b, vl, 0U);
375 if (crc32c == crc32v)
376 {
377 TRACE(Warn, "Recovered matching write at offset " << (XrdSys::PageSize * p1)+p1_off << " of file " << fn_ << " (overwrite)");
378 break;
379 }
380 TRACE(Warn, CRCMismatchError(vl, p1, crc32c, crc32v) << " (overwrite)");
381 }
382 else
383 {
384 TRACE(Warn, CRCMismatchError(bavail, p1, crc32c, crc32v) << " (overwrite)");
385 }
386 return -EDOM;
387 } while(0);
388 }
389 else
390 {
391 // non-loose case
392 rret = XrdOssCsiPages::fullread(fd, b, XrdSys::PageSize * p1, bavail);
393 if (rret<0)
394 {
395 TRACE(Warn, PageReadError(bavail, p1, rret));
396 return rret;
397 }
398 const uint32_t crc32c = XrdOucCRC::Calc32C(b, bavail, 0U);
399 if (crc32v != crc32c)
400 {
401 TRACE(Warn, CRCMismatchError(bavail, p1, crc32c, crc32v));
402 return -EDOM;
403 }
404 }
405
406 uint32_t crc32c = XrdOucCRC::Calc32C(b, p1_off, 0U);
407 if (csvec)
408 {
409 crc32c = CrcUtils.crc32c_combine(crc32c, csvec[0], blen);
410 }
411 else
412 {
413 crc32c = XrdOucCRC::Calc32C(buff, blen, crc32c);
414 }
415 if (p1_off+blen < bavail)
416 {
417 const uint32_t cl = XrdOucCRC::Calc32C(&b[p1_off+blen], bavail-p1_off-blen, 0U);
418 crc32c = CrcUtils.crc32c_combine(crc32c, cl, bavail-p1_off-blen);
419 }
420 prepageval = crc32c;
421 return 0;
422}
423
424//
425// used by StoreRangeUnaligned when the end of supplied data is not page aligned
426// and is before the end of file
427//
428// offset: first offset in file at which write is page aligned
429// blen: length of write after offset
430//
431int XrdOssCsiPages::StoreRangeUnaligned_postblock(XrdOssDF *const fd, const void *const buff, const size_t blen,
432 const off_t offset, const off_t trackinglen,
433 const uint32_t *const csvec, uint32_t &lastpageval)
434{
435 EPNAME("StoreRangeUnaligned_postblock");
436
437 const uint8_t *const p = (uint8_t*)buff;
438 const off_t p2 = (offset+blen) / XrdSys::PageSize;
439 const size_t p2_off = (offset+blen) % XrdSys::PageSize;
440
441 const off_t tracked_page = trackinglen / XrdSys::PageSize;
442 const size_t tracked_off = trackinglen % XrdSys::PageSize;
443
444 // we should not be called in this case
445 assert(p2_off != 0);
446
447 // how much existing data this last (p2) page
448 const size_t bavail = (p2==tracked_page) ? tracked_off : XrdSys::PageSize;
449
450 // how much of that data will not be overwritten
451 const size_t bremain = (p2_off < bavail) ? bavail-p2_off : 0;
452
453 // we should not be called if it is a complete overwrite
454 assert(bremain>0);
455
456 // need to use remaining data to calculate the crc of the new p2 page.
457 // read and verify it now.
458
459 uint32_t crc32v;
460 ssize_t rret = ts_->ReadTags(&crc32v, p2, 1);
461 if (rret<0)
462 {
463 TRACE(Warn, TagsReadError(p2, 1, rret));
464 return rret;
465 }
466
467 uint8_t b[XrdSys::PageSize];
468 rret = XrdOssCsiPages::fullread(fd, b, XrdSys::PageSize * p2, bavail);
469 if (rret<0)
470 {
471 TRACE(Warn, PageReadError(bavail, p2, rret));
472 return rret;
473 }
474
475 // calculate crc of new data with remaining data at the end:
476 uint32_t crc32c = 0;
477 if (csvec)
478 {
479 crc32c = csvec[(blen-1)/XrdSys::PageSize];
480 }
481 else
482 {
483 crc32c = XrdOucCRC::Calc32C(&p[blen-p2_off], p2_off, 0U);
484 }
485
486 const uint32_t cl = XrdOucCRC::Calc32C(&b[p2_off], bremain, 0U);
487 // crc of page with new data
488 crc32c = CrcUtils.crc32c_combine(crc32c, cl, bremain);
489 // crc of current page (before write)
490 const uint32_t crc32prev = XrdOucCRC::Calc32C(b, bavail, 0U);
491
492 // check(s) to see if remaining data was valid
493
494 // usual check; unmodified block is consistent with stored crc
495 // for loose write we allow case were the new crc has already been stored in the tagfile
496
497 // this may be an implicit verification (e.g. pgWrite may return EDOM without Verify requested)
498 // however, it's not clear if there is a meaningful way to crc a mismatching page during a partial overwrite
499 if (crc32v != crc32prev)
500 {
501 if (loosewrite_ && crc32c != crc32prev)
502 {
503 // log that we chceked if the tag was matching the previous data
504 TRACE(Warn, CRCMismatchError(bavail, p2, crc32prev, crc32v) << " (loose match, still trying)");
505 if (crc32c == crc32v)
506 {
507 TRACE(Warn, "Recovered matching write at offset " << (XrdSys::PageSize * p2) << " of file " << fn_);
508 lastpageval = crc32c;
509 return 0;
510 }
511 TRACE(Warn, CRCMismatchError(bavail, p2, crc32c, crc32v));
512 }
513 else
514 {
515 TRACE(Warn, CRCMismatchError(bavail, p2, crc32prev, crc32v));
516 }
517 return -EDOM;
518 }
519
520 lastpageval = crc32c;
521 return 0;
522}
523
524//
525// StoreRangeUnaligned
526//
527// Used by pgWrite or Write (via UpdateRangeUnaligned) where the start of this update is not page aligned within the file
528// OR where the end of this update is before the end of the file and is not page aligned
529// OR where end of the file is not page aligned and this update starts after it
530// i.e. where checksums of last current page of file, or the first or last pages after writing this buffer will need to be recomputed
531//
532int XrdOssCsiPages::StoreRangeUnaligned(XrdOssDF *const fd, const void *buff, const off_t offset, const size_t blen, const Sizes_t &sizes, const uint32_t *const csvec)
533{
534 EPNAME("StoreRangeUnaligned");
535 const off_t p1 = offset / XrdSys::PageSize;
536
537 const off_t trackinglen = sizes.first;
538 if (offset > trackinglen)
539 {
540 const int ret = UpdateRangeHoleUntilPage(fd, p1, sizes);
541 if (ret<0)
542 {
543 TRACE(Warn, "Error updating tags for holes, error=" << ret);
544 return ret;
545 }
546 }
547
548 const size_t p1_off = offset % XrdSys::PageSize;
549 const size_t p2_off = (offset+blen) % XrdSys::PageSize;
550
551 bool hasprepage = false;
552 uint32_t prepageval;
553
554 // deal with partial first page
555 if ( p1_off>0 || blen < static_cast<size_t>(XrdSys::PageSize) )
556 {
557 const size_t bavail = (XrdSys::PageSize-p1_off > blen) ? blen : (XrdSys::PageSize-p1_off);
558 const int ret = StoreRangeUnaligned_preblock(fd, buff, bavail, offset, trackinglen, csvec, prepageval);
559 if (ret<0)
560 {
561 return ret;
562 }
563 hasprepage = true;
564 }
565
566 // next page (if any)
567 const off_t np = hasprepage ? p1+1 : p1;
568 // next page starts at buffer offset
569 const size_t npoff = hasprepage ? (XrdSys::PageSize - p1_off) : 0;
570
571 // anything in next page?
572 if (blen <= npoff)
573 {
574 // only need to write the first, partial page
575 if (hasprepage)
576 {
577 const ssize_t wret = ts_->WriteTags(&prepageval, p1, 1);
578 if (wret<0)
579 {
580 TRACE(Warn, TagsWriteError(p1, 1, wret));
581 return wret;
582 }
583 }
584 return 0;
585 }
586
587 const uint8_t *const p = (uint8_t*)buff;
588 const uint32_t *csp = csvec;
589 if (csp && hasprepage) csp++;
590
591 // see if there will be no old data to account for in the last page
592 if (p2_off == 0 || (offset + blen >= static_cast<size_t>(trackinglen)))
593 {
594 // write any precomputed prepage, then write full pages and last partial page (computing or using supplied csvec)
595 const ssize_t aret = apply_sequential_aligned_modify(&p[npoff], np, blen-npoff, csp, hasprepage, false, prepageval, 0U);
596 if (aret<0)
597 {
598 TRACE(Warn, "Error updating tags, error=" << aret);
599 return aret;
600 }
601 return 0;
602 }
603
604 // last page contains existing data that has to be read to modify it
605
606 uint32_t lastpageval;
607 const int ret = StoreRangeUnaligned_postblock(fd, &p[npoff], blen-npoff, offset+npoff, trackinglen, csp, lastpageval);
608 if (ret<0)
609 {
610 return ret;
611 }
612
613 // write any precomputed prepage, then write full pages (computing or using supplied csvec) and finally write precomputed last page
614 const ssize_t aret = apply_sequential_aligned_modify(&p[npoff], np, blen-npoff, csp, hasprepage, true, prepageval, lastpageval);
615 if (aret<0)
616 {
617 TRACE(Warn, "Error updating tags, error=" << aret);
618 return aret;
619 }
620
621 return 0;
622}
623
624// VerifyRangeUnaligned
625//
626// Used by Read for various cases with mis-alignment. See FetchRangeUnaligned for list of conditions.
627//
628int XrdOssCsiPages::VerifyRangeUnaligned(XrdOssDF *const fd, const void *const buff, const off_t offset, const size_t blen, const Sizes_t &sizes)
629{
630 return FetchRangeUnaligned(fd, buff, offset, blen, sizes, NULL, XrdOssDF::Verify);
631}
632
633//
634// used by FetchRangeUnaligned when only part of the data in the first page is needed, or the page is short
635//
636// offset: offset in file for start of read
637// blen: total length of read
638//
639int XrdOssCsiPages::FetchRangeUnaligned_preblock(XrdOssDF *const fd, const void *const buff, const off_t offset, const size_t blen,
640 const off_t trackinglen, uint32_t *const tbuf, uint32_t *const csvec, const uint64_t opts)
641{
642 EPNAME("FetchRangeUnaligned_preblock");
643
644 const off_t p1 = offset / XrdSys::PageSize;
645 const size_t p1_off = offset % XrdSys::PageSize;
646
647 // bavail is length of data in this page
648 const size_t bavail = std::min(trackinglen - (XrdSys::PageSize*p1), (off_t)XrdSys::PageSize);
649
650 // bcommon is length of data in this page that user wants
651 const size_t bcommon = std::min(bavail - p1_off, blen);
652
653 uint8_t b[XrdSys::PageSize];
654 const uint8_t *ub = (uint8_t*)buff;
655 if (bavail>bcommon)
656 {
657 // will need more data to either verify or return crc of the user's data
658 // (in case of no verify and no csvec FetchRange() returns early)
659 const ssize_t rret = XrdOssCsiPages::fullread(fd, b, XrdSys::PageSize*p1, bavail);
660 if (rret<0)
661 {
662 TRACE(Warn, PageReadError(bavail, p1, rret));
663 return rret;
664 }
665 // if we're going to verify, make sure we just read the same overlapping data as that in the user's buffer
666 if ((opts & XrdOssDF::Verify))
667 {
668 if (memcmp(buff, &b[p1_off], bcommon))
669 {
670 size_t badoff;
671 for(badoff=0;badoff<bcommon;badoff++) { if (((uint8_t*)buff)[badoff] != b[p1_off+badoff]) break; }
672 badoff = (badoff < bcommon) ? badoff : 0; // may be possible with concurrent modification
673 TRACE(Warn, ByteMismatchError(bavail, XrdSys::PageSize*p1+p1_off+badoff, ((uint8_t*)buff)[badoff], b[p1_off+badoff]));
674 return -EDOM;
675 }
676 }
677 ub = b;
678 }
679 // verify; based on whole block, or user's buffer (if it contains the whole block)
680 if ((opts & XrdOssDF::Verify))
681 {
682 const uint32_t crc32calc = XrdOucCRC::Calc32C(ub, bavail, 0U);
683 if (tbuf[0] != crc32calc)
684 {
685 TRACE(Warn, CRCMismatchError(bavail, p1, crc32calc, tbuf[0]));
686 return -EDOM;
687 }
688 }
689
690 // if we're returning csvec values and this first block
691 // needs adjustment because user requested a subset..
692 if (bavail>bcommon && csvec)
693 {
694 // make sure csvec[0] corresponds to only the data the user wanted, not whole page.
695 // if we have already verified the page + common part matches user's, take checksum of common.
696 // (Use local copy of page, perhaps less chance of accidental concurrent modification than buffer)
697 // Otherwise base on saved checksum.
698 if ((opts & XrdOssDF::Verify))
699 {
700 csvec[0] = XrdOucCRC::Calc32C(&b[p1_off], bcommon, 0u);
701 }
702 else
703 {
704 // calculate expected user checksum based on block's recorded checksum, adjusting
705 // for data not included in user's request. If either the returned data or the
706 // data not included in the user's request are corrupt the returned checksum and
707 // returned data will (probably) mismatch.
708
709 // remove block data before p1_off from checksum
710 uint32_t crc32c = XrdOucCRC::Calc32C(b, p1_off, 0u);
711 csvec[0] = CrcUtils.crc32c_split2(csvec[0], crc32c, bavail-p1_off);
712
713 // remove block data after p1_off+bcommon upto bavail
714 crc32c = XrdOucCRC::Calc32C(&b[p1_off+bcommon], bavail-p1_off-bcommon, 0u);
715 csvec[0] = CrcUtils.crc32c_split1(csvec[0], crc32c, bavail-p1_off-bcommon);
716 }
717 }
718 return 0;
719}
720
721//
722// used by FetchRangeUnaligned when only part of a page of data is needed from the last page
723//
724// offset: offset in file for start of read
725// blen: total length of read
726//
727int XrdOssCsiPages::FetchRangeUnaligned_postblock(XrdOssDF *const fd, const void *const buff, const off_t offset, const size_t blen,
728 const off_t trackinglen, uint32_t *const tbuf, uint32_t *const csvec, const size_t tidx, const uint64_t opts)
729{
730 EPNAME("FetchRangeUnaligned_postblock");
731
732 const off_t p2 = (offset+blen) / XrdSys::PageSize;
733 const size_t p2_off = (offset+blen) % XrdSys::PageSize;
734
735 // length of data in last (p2) page
736 const size_t bavail = std::min(trackinglen - (XrdSys::PageSize*p2), (off_t)XrdSys::PageSize);
737
738 // how much of that data is not being returned
739 const size_t bremain = (p2_off < bavail) ? bavail-p2_off : 0;
740 uint8_t b[XrdSys::PageSize];
741 const uint8_t *ub = &((uint8_t*)buff)[blen-p2_off];
742 if (bremain>0)
743 {
744 const ssize_t rret = XrdOssCsiPages::fullread(fd, b, XrdSys::PageSize*p2, bavail);
745 if (rret<0)
746 {
747 TRACE(Warn, PageReadError(bavail, p2, rret));
748 return rret;
749 }
750 // if we're verifying make sure overlapping part of data just read matches user's buffer
751 if ((opts & XrdOssDF::Verify))
752 {
753 const uint8_t *const p = (uint8_t*)buff;
754 if (memcmp(&p[blen-p2_off], b, p2_off))
755 {
756 size_t badoff;
757 for(badoff=0;badoff<p2_off;badoff++) { if (p[blen-p2_off+badoff] != b[badoff]) break; }
758 badoff = (badoff < p2_off) ? badoff : 0; // may be possible with concurrent modification
759 TRACE(Warn, ByteMismatchError(bavail, XrdSys::PageSize*p2+badoff, p[blen-p2_off+badoff], b[badoff]));
760 return -EDOM;
761 }
762 }
763 ub = b;
764 }
765 if ((opts & XrdOssDF::Verify))
766 {
767 const uint32_t crc32calc = XrdOucCRC::Calc32C(ub, bavail, 0U);
768 if (tbuf[tidx] != crc32calc)
769 {
770 TRACE(Warn, CRCMismatchError(bavail, p2, crc32calc, tbuf[tidx]));
771 return -EDOM;
772 }
773 }
774 // if we're returning csvec and user only request part of page
775 // adjust the crc
776 if (csvec && bremain>0)
777 {
778 if ((opts & XrdOssDF::Verify))
779 {
780 // verified; calculate crc based on common part of page.
781 csvec[tidx] = XrdOucCRC::Calc32C(b, p2_off, 0u);
782 }
783 else
784 {
785 // recalculate crc based on recorded checksum and adjusting for part of data not returned.
786 // If either the returned data or the data not included in the user's request are
787 // corrupt the returned checksum and returned data will (probably) mismatch.
788
789 const uint32_t crc32c = XrdOucCRC::Calc32C(&b[p2_off], bremain, 0u);
790 csvec[tidx] = CrcUtils.crc32c_split1(csvec[tidx], crc32c, bremain);
791 }
792 }
793
794 return 0;
795}
796
797//
798// FetchRangeUnaligned
799//
800// Used by pgRead/Read when reading a range not starting at a page boundary within the file
801// OR when the length is not a multiple of the page-size and the read finishes not at the end of file.
802//
803int XrdOssCsiPages::FetchRangeUnaligned(XrdOssDF *const fd, const void *const buff, const off_t offset, const size_t blen, const Sizes_t &sizes, uint32_t *const csvec, const uint64_t opts)
804{
805 EPNAME("FetchRangeUnaligned");
806
807 const off_t p1 = offset / XrdSys::PageSize;
808 const size_t p1_off = offset % XrdSys::PageSize;
809 const off_t p2 = (offset+blen) / XrdSys::PageSize;
810 const size_t p2_off = (offset+blen) % XrdSys::PageSize;
811
812 const off_t trackinglen = sizes.first;
813
814 size_t ntagstoread = (p2_off>0) ? p2-p1+1 : p2-p1;
815 size_t ntagsbase = p1;
816 uint32_t tbufint[stsize_], *tbuf=0;
817 size_t tbufsz = 0;
818 if (!csvec)
819 {
820 tbuf = tbufint;
821 tbufsz = sizeof(tbufint)/sizeof(uint32_t);
822 }
823 else
824 {
825 tbuf = csvec;
826 tbufsz = ntagstoread;
827 }
828
829 size_t tcnt = std::min(ntagstoread, tbufsz);
830 ssize_t rret = ts_->ReadTags(tbuf, ntagsbase, tcnt);
831 if (rret<0)
832 {
833 TRACE(Warn, TagsReadError(ntagsbase, tcnt, rret) << " (first)");
834 return rret;
835 }
836 ntagstoread -= tcnt;
837
838 // deal with partial first page
839 if ( p1_off>0 || blen < static_cast<size_t>(XrdSys::PageSize) )
840 {
841 const int ret = FetchRangeUnaligned_preblock(fd, buff, offset, blen, trackinglen, tbuf, csvec, opts);
842 if (ret<0)
843 {
844 return ret;
845 }
846 }
847
848 // first (inclusive) and last (exclusive) full page
849 const off_t fp = (p1_off != 0) ? p1+1 : p1;
850 const off_t lp = p2;
851
852 // verify full pages if wanted
853 if (fp<lp && (opts & XrdOssDF::Verify))
854 {
855 const uint8_t *const p = (uint8_t*)buff;
856 uint32_t calcbuf[stsize_];
857 const size_t cbufsz = sizeof(calcbuf)/sizeof(uint32_t);
858 size_t toread = lp-fp;
859 size_t nread = 0;
860 while(toread>0)
861 {
862 const size_t ccnt = std::min(toread, cbufsz);
863 XrdOucCRC::Calc32C(&p[(p1_off ? XrdSys::PageSize-p1_off : 0)+XrdSys::PageSize*nread],ccnt*XrdSys::PageSize,calcbuf);
864 size_t tovalid = ccnt;
865 size_t nvalid = 0;
866 while(tovalid>0)
867 {
868 const size_t tidx=fp+nread+nvalid - ntagsbase;
869 const size_t nv = std::min(tovalid, tbufsz-tidx);
870 if (nv == 0)
871 {
872 assert(csvec == NULL);
873 ntagsbase += tbufsz;
874 tcnt = std::min(ntagstoread, tbufsz);
875 rret = ts_->ReadTags(tbuf, ntagsbase, tcnt);
876 if (rret<0)
877 {
878 TRACE(Warn, TagsReadError(ntagsbase, tcnt, rret) << " (mid)");
879 return rret;
880 }
881 ntagstoread -= tcnt;
882 continue;
883 }
884 if (memcmp(&calcbuf[nvalid], &tbuf[tidx], 4*nv))
885 {
886 size_t badpg;
887 for(badpg=0;badpg<nv;badpg++) { if (memcmp(&calcbuf[nvalid+badpg], &tbuf[tidx+badpg],4)) break; }
889 (ntagsbase+tidx+badpg),
890 calcbuf[nvalid+badpg], tbuf[tidx+badpg]));
891 return -EDOM;
892 }
893 tovalid -= nv;
894 nvalid += nv;
895 }
896 toread -= ccnt;
897 nread += ccnt;
898 }
899 }
900
901 // last partial page
902 if (p2>p1 && p2_off > 0)
903 {
904 // make sure we have last tag;
905 // (should already have all of them if we're returning them in csvec)
906 size_t tidx = p2 - ntagsbase;
907 if (tidx >= tbufsz)
908 {
909 assert(csvec == NULL);
910 tidx = 0;
911 ntagsbase = p2;
912 rret = ts_->ReadTags(tbuf, ntagsbase, 1);
913 if (rret<0)
914 {
915 TRACE(Warn, TagsReadError(ntagsbase, 1, rret) << " (last)");
916 return rret;
917 }
918 ntagstoread = 0;
919 }
920
921 const int ret = FetchRangeUnaligned_postblock(fd, buff, offset, blen, trackinglen, tbuf, csvec, tidx, opts);
922 if (ret<0)
923 {
924 return ret;
925 }
926 }
927
928 return 0;
929}
#define EPNAME(x)
XrdOucTrace OssCsiTrace
static XrdOssCsiCrcUtils CrcUtils
uint32_t crc32c(uint32_t crc, void const *buf, size_t len)
struct myOpts opts
#define TRACE(act, x)
Definition XrdTrace.hh:63
static uint32_t crc32c_extendwith_zero(uint32_t crc, size_t len)
static uint32_t crc32c_combine(uint32_t crc1, uint32_t crc2, size_t len2)
static uint32_t crc32c_split1(uint32_t crctot, uint32_t crc2, size_t len2)
static uint32_t crc32c_split2(uint32_t crctot, uint32_t crc1, size_t len2)
int StoreRangeUnaligned(XrdOssDF *, const void *, off_t, size_t, const Sizes_t &, const uint32_t *)
ssize_t apply_sequential_aligned_modify(const void *, off_t, size_t, const uint32_t *, bool, bool, uint32_t, uint32_t)
std::string ByteMismatchError(size_t blen, off_t off, uint8_t user, uint8_t page)
static ssize_t maxread(XrdOssDF *fd, void *buff, const off_t off, const size_t sz, size_t tg=0)
std::string TagsReadError(off_t start, size_t n, int ret)
std::unique_ptr< XrdOssCsiTagstore > ts_
int UpdateRangeUnaligned(XrdOssDF *, const void *, off_t, size_t, const Sizes_t &)
std::string TagsWriteError(off_t start, size_t n, int ret)
int FetchRangeUnaligned_preblock(XrdOssDF *, const void *, off_t, size_t, off_t, uint32_t *, uint32_t *, uint64_t)
int UpdateRangeHoleUntilPage(XrdOssDF *, off_t, const Sizes_t &)
static ssize_t fullread(XrdOssDF *fd, void *buff, const off_t off, const size_t sz)
std::pair< off_t, off_t > Sizes_t
int FetchRangeUnaligned(XrdOssDF *, const void *, off_t, size_t, const Sizes_t &, uint32_t *, uint64_t)
int FetchRangeUnaligned_postblock(XrdOssDF *, const void *, off_t, size_t, off_t, uint32_t *, uint32_t *, size_t, uint64_t)
int VerifyRangeUnaligned(XrdOssDF *, const void *, off_t, size_t, const Sizes_t &)
std::string CRCMismatchError(size_t blen, off_t pgnum, uint32_t got, uint32_t expected)
int StoreRangeUnaligned_preblock(XrdOssDF *, const void *, size_t, off_t, off_t, const uint32_t *, uint32_t &)
std::string PageReadError(size_t blen, off_t pgnum, int ret)
int StoreRangeUnaligned_postblock(XrdOssDF *, const void *, size_t, off_t, off_t, const uint32_t *, uint32_t &)
const std::string fn_
static const size_t stsize_
static const uint64_t Verify
all: Verify checksums
Definition XrdOss.hh:223
static uint32_t Calc32C(const void *data, size_t count, uint32_t prevcs=0)
Definition XrdOucCRC.cc:190
static const int PageSize