OLD | NEW |
| (Empty) |
1 /* | |
2 bsdiff.c -- Binary patch generator. | |
3 | |
4 Copyright 2003 Colin Percival | |
5 | |
6 For the terms under which this work may be distributed, please see | |
7 the adjoining file "LICENSE". | |
8 | |
9 ChangeLog: | |
10 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit | |
11 values throughout. | |
12 --Benjamin Smedberg <benjamin@smedbergs.us> | |
13 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table | |
14 provided by libbz2. | |
15 --Darin Fisher <darin@meer.net> | |
16 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library | |
17 --Rahul Kuchhal | |
18 */ | |
19 | |
20 #include "mbspatch.h" | |
21 | |
22 #include <stdlib.h> | |
23 #include <stdio.h> | |
24 #include <string.h> | |
25 #include <fcntl.h> | |
26 #include <stdarg.h> | |
27 #ifdef _WIN32 | |
28 #include <io.h> | |
29 #include <winsock2.h> | |
30 #else | |
31 #include <unistd.h> | |
32 #include <arpa/inet.h> | |
33 #define _O_BINARY 0 | |
34 #endif | |
35 | |
36 #undef MIN | |
37 #define MIN(x,y) (((x)<(y)) ? (x) : (y)) | |
38 | |
39 static void | |
40 reporterr(int e, const char *fmt, ...) | |
41 { | |
42 if (fmt) { | |
43 va_list args; | |
44 va_start(args, fmt); | |
45 vfprintf(stderr, fmt, args); | |
46 va_end(args); | |
47 } | |
48 | |
49 exit(e); | |
50 } | |
51 | |
52 static void | |
53 split(int *I,int *V,int start,int len,int h) | |
54 { | |
55 int i,j,k,x,tmp,jj,kk; | |
56 | |
57 if(len<16) { | |
58 for(k=start;k<start+len;k+=j) { | |
59 j=1;x=V[I[k]+h]; | |
60 for(i=1;k+i<start+len;i++) { | |
61 if(V[I[k+i]+h]<x) { | |
62 x=V[I[k+i]+h]; | |
63 j=0; | |
64 }; | |
65 if(V[I[k+i]+h]==x) { | |
66 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; | |
67 j++; | |
68 }; | |
69 }; | |
70 for(i=0;i<j;i++) V[I[k+i]]=k+j-1; | |
71 if(j==1) I[k]=-1; | |
72 }; | |
73 return; | |
74 }; | |
75 | |
76 x=V[I[start+len/2]+h]; | |
77 jj=0;kk=0; | |
78 for(i=start;i<start+len;i++) { | |
79 if(V[I[i]+h]<x) jj++; | |
80 if(V[I[i]+h]==x) kk++; | |
81 }; | |
82 jj+=start;kk+=jj; | |
83 | |
84 i=start;j=0;k=0; | |
85 while(i<jj) { | |
86 if(V[I[i]+h]<x) { | |
87 i++; | |
88 } else if(V[I[i]+h]==x) { | |
89 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; | |
90 j++; | |
91 } else { | |
92 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; | |
93 k++; | |
94 }; | |
95 }; | |
96 | |
97 while(jj+j<kk) { | |
98 if(V[I[jj+j]+h]==x) { | |
99 j++; | |
100 } else { | |
101 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; | |
102 k++; | |
103 }; | |
104 }; | |
105 | |
106 if(jj>start) split(I,V,start,jj-start,h); | |
107 | |
108 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; | |
109 if(jj==kk-1) I[jj]=-1; | |
110 | |
111 if(start+len>kk) split(I,V,kk,start+len-kk,h); | |
112 } | |
113 | |
114 static void | |
115 qsufsort(int *I,int *V,unsigned char *old,int oldsize) | |
116 { | |
117 int buckets[256]; | |
118 int i,h,len; | |
119 | |
120 for(i=0;i<256;i++) buckets[i]=0; | |
121 for(i=0;i<oldsize;i++) buckets[old[i]]++; | |
122 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; | |
123 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; | |
124 buckets[0]=0; | |
125 | |
126 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; | |
127 I[0]=oldsize; | |
128 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; | |
129 V[oldsize]=0; | |
130 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; | |
131 I[0]=-1; | |
132 | |
133 for(h=1;I[0]!=-(oldsize+1);h+=h) { | |
134 len=0; | |
135 for(i=0;i<oldsize+1;) { | |
136 if(I[i]<0) { | |
137 len-=I[i]; | |
138 i-=I[i]; | |
139 } else { | |
140 if(len) I[i-len]=-len; | |
141 len=V[I[i]]+1-i; | |
142 split(I,V,i,len,h); | |
143 i+=len; | |
144 len=0; | |
145 }; | |
146 }; | |
147 if(len) I[i-len]=-len; | |
148 }; | |
149 | |
150 for(i=0;i<oldsize+1;i++) I[V[i]]=i; | |
151 } | |
152 | |
153 static int | |
154 matchlen(unsigned char *old,int oldsize,unsigned char *newbuf,int newsize) | |
155 { | |
156 int i; | |
157 | |
158 for(i=0;(i<oldsize)&&(i<newsize);i++) | |
159 if(old[i]!=newbuf[i]) break; | |
160 | |
161 return i; | |
162 } | |
163 | |
164 static int | |
165 search(int *I,unsigned char *old,int oldsize, | |
166 unsigned char *newbuf,int newsize,int st,int en,int *pos) | |
167 { | |
168 int x,y; | |
169 | |
170 if(en-st<2) { | |
171 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); | |
172 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); | |
173 | |
174 if(x>y) { | |
175 *pos=I[st]; | |
176 return x; | |
177 } else { | |
178 *pos=I[en]; | |
179 return y; | |
180 } | |
181 }; | |
182 | |
183 x=st+(en-st)/2; | |
184 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) { | |
185 return search(I,old,oldsize,newbuf,newsize,x,en,pos); | |
186 } else { | |
187 return search(I,old,oldsize,newbuf,newsize,st,x,pos); | |
188 }; | |
189 } | |
190 | |
191 int main(int argc,char *argv[]) | |
192 { | |
193 int fd; | |
194 unsigned char *old,*newbuf; | |
195 int oldsize,newsize; | |
196 int *I,*V; | |
197 | |
198 int scan,pos,len; | |
199 int lastscan,lastpos,lastoffset; | |
200 int oldscore,scsc; | |
201 | |
202 int s,Sf,lenf,Sb,lenb; | |
203 int overlap,Ss,lens; | |
204 int i; | |
205 | |
206 int dblen,eblen; | |
207 unsigned char *db,*eb; | |
208 | |
209 unsigned int scrc; | |
210 | |
211 MBSPatchHeader header = { | |
212 {'M','B','D','I','F','F','1','0'}, | |
213 0, 0, 0, 0, 0, 0 | |
214 }; | |
215 | |
216 int numtriples; | |
217 | |
218 if(argc!=4) | |
219 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0
]); | |
220 | |
221 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure | |
222 that we never try to malloc(0) and get a NULL pointer */ | |
223 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) || | |
224 ((oldsize=lseek(fd,0,SEEK_END))==-1) || | |
225 ((old=(unsigned char*) malloc(oldsize+1))==NULL) || | |
226 (lseek(fd,0,SEEK_SET)!=0) || | |
227 (read(fd,old,oldsize)!=oldsize) || | |
228 (close(fd)==-1)) | |
229 reporterr(1,"%s\n",argv[1]); | |
230 | |
231 scrc = CalculateCrc(old, oldsize); | |
232 | |
233 if(((I=(int*) malloc((oldsize+1)*sizeof(int)))==NULL) || | |
234 ((V=(int*) malloc((oldsize+1)*sizeof(int)))==NULL)) | |
235 reporterr(1,NULL); | |
236 | |
237 qsufsort(I,V,old,oldsize); | |
238 | |
239 free(V); | |
240 | |
241 /* Allocate newsize+1 bytes instead of newsize bytes to ensure | |
242 that we never try to malloc(0) and get a NULL pointer */ | |
243 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) || | |
244 ((newsize=lseek(fd,0,SEEK_END))==-1) || | |
245 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) || | |
246 (lseek(fd,0,SEEK_SET)!=0) || | |
247 (read(fd,newbuf,newsize)!=newsize) || | |
248 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]); | |
249 | |
250 if(((db=(unsigned char*) malloc(newsize+1))==NULL) || | |
251 ((eb=(unsigned char*) malloc(newsize+1))==NULL)) | |
252 reporterr(1,NULL); | |
253 | |
254 dblen=0; | |
255 eblen=0; | |
256 | |
257 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0) | |
258 reporterr(1,"%s\n",argv[3]); | |
259 | |
260 /* start writing here */ | |
261 | |
262 /* We don't know the lengths yet, so we will write the header again | |
263 at the end */ | |
264 | |
265 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader)) | |
266 reporterr(1,"%s\n",argv[3]); | |
267 | |
268 scan=0;len=0; | |
269 lastscan=0;lastpos=0;lastoffset=0; | |
270 numtriples = 0; | |
271 while(scan<newsize) { | |
272 oldscore=0; | |
273 | |
274 for(scsc=scan+=len;scan<newsize;scan++) { | |
275 len=search(I,old,oldsize,newbuf+scan,newsize-scan, | |
276 0,oldsize,&pos); | |
277 | |
278 for(;scsc<scan+len;scsc++) | |
279 if((scsc+lastoffset<oldsize) && | |
280 (old[scsc+lastoffset] == newbuf[scsc])) | |
281 oldscore++; | |
282 | |
283 if(((len==oldscore) && (len!=0)) || | |
284 (len>oldscore+8)) break; | |
285 | |
286 if((scan+lastoffset<oldsize) && | |
287 (old[scan+lastoffset] == newbuf[scan])) | |
288 oldscore--; | |
289 }; | |
290 | |
291 if((len!=oldscore) || (scan==newsize)) { | |
292 MBSPatchTriple triple; | |
293 | |
294 s=0;Sf=0;lenf=0; | |
295 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { | |
296 if(old[lastpos+i]==newbuf[lastscan+i]) s++; | |
297 i++; | |
298 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }; | |
299 }; | |
300 | |
301 lenb=0; | |
302 if(scan<newsize) { | |
303 s=0;Sb=0; | |
304 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { | |
305 if(old[pos-i]==newbuf[scan-i]) s++; | |
306 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }; | |
307 }; | |
308 }; | |
309 | |
310 if(lastscan+lenf>scan-lenb) { | |
311 overlap=(lastscan+lenf)-(scan-lenb); | |
312 s=0;Ss=0;lens=0; | |
313 for(i=0;i<overlap;i++) { | |
314 if(newbuf[lastscan+lenf-overlap+i]== | |
315 old[lastpos+lenf-overlap+i]) s++; | |
316 if(newbuf[scan-lenb+i]== | |
317 old[pos-lenb+i]) s--; | |
318 if(s>Ss) { Ss=s; lens=i+1; }; | |
319 }; | |
320 | |
321 lenf+=lens-overlap; | |
322 lenb-=lens; | |
323 }; | |
324 | |
325 for(i=0;i<lenf;i++) | |
326 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i]; | |
327 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) | |
328 eb[eblen+i]=newbuf[lastscan+lenf+i]; | |
329 | |
330 dblen+=lenf; | |
331 eblen+=(scan-lenb)-(lastscan+lenf); | |
332 | |
333 triple.x = htonl(lenf); | |
334 triple.y = htonl((scan-lenb)-(lastscan+lenf)); | |
335 triple.z = htonl((pos-lenb)-(lastpos+lenf)); | |
336 if (write(fd,&triple,sizeof(triple)) != sizeof(triple)) | |
337 reporterr(1,NULL); | |
338 | |
339 #ifdef DEBUG_bsmedberg | |
340 printf("Writing a block:\n" | |
341 " X: %u\n" | |
342 " Y: %u\n" | |
343 " Z: %i\n", | |
344 (int) lenf, | |
345 (int) ((scan-lenb)-(lastscan+lenf)), | |
346 (int) ((pos-lenb)-(lastpos+lenf))); | |
347 #endif | |
348 | |
349 ++numtriples; | |
350 | |
351 lastscan=scan-lenb; | |
352 lastpos=pos-lenb; | |
353 lastoffset=pos-scan; | |
354 }; | |
355 }; | |
356 | |
357 if(write(fd,db,dblen)!=dblen) | |
358 reporterr(1,NULL); | |
359 | |
360 if(write(fd,eb,eblen)!=eblen) | |
361 reporterr(1,NULL); | |
362 | |
363 header.slen = htonl(oldsize); | |
364 header.scrc32 = htonl(scrc); | |
365 header.dlen = htonl(newsize); | |
366 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple)); | |
367 header.difflen = htonl(dblen); | |
368 header.extralen = htonl(eblen); | |
369 | |
370 if (lseek(fd,0,SEEK_SET) == -1 || | |
371 write(fd,&header,sizeof(header)) != sizeof(header) || | |
372 close(fd) == -1) | |
373 reporterr(1,NULL); | |
374 | |
375 free(db); | |
376 free(eb); | |
377 free(I); | |
378 free(old); | |
379 free(newbuf); | |
380 | |
381 return 0; | |
382 } | |
OLD | NEW |