1818#include  " llvm/ADT/SmallVector.h" 
1919#include  " llvm/ADT/StringRef.h" 
2020#include  " llvm/Bitstream/BitCodes.h" 
21+ #include  " llvm/Support/Casting.h" 
2122#include  " llvm/Support/Endian.h" 
2223#include  " llvm/Support/MathExtras.h" 
2324#include  " llvm/Support/raw_ostream.h" 
2829namespace  llvm  {
2930
3031class  BitstreamWriter  {
31-   // / Out - The buffer that keeps unflushed bytes.
32-   SmallVectorImpl<char > &Out;
33- 
34-   // / FS - The file stream that Out flushes to. If FS is nullptr, it does not
35-   // / support read or seek, Out cannot be flushed until all data are written.
36-   raw_fd_stream *FS;
37- 
38-   // / FlushThreshold - If FS is valid, this is the threshold (unit B) to flush
39-   // / FS.
32+   // / Owned buffer, used to init Buffer if the provided stream doesn't happen to
33+   // / be a buffer itself.
34+   SmallVector<char , 0 > OwnBuffer;
35+   // / Internal buffer for unflushed bytes (unless there is no stream to flush
36+   // / to, case in which these are "the bytes"). The writer backpatches, so it is
37+   // / efficient to buffer.
38+   SmallVectorImpl<char > &Buffer;
39+ 
40+   // / FS - The file stream that Buffer flushes to. If FS is a raw_fd_stream, the
41+   // / writer will incrementally flush at subblock boundaries. Otherwise flushing
42+   // / will happen at the end of BitstreamWriter's lifetime.
43+   raw_ostream *const  FS;
44+ 
45+   // / FlushThreshold - this is the threshold (unit B) to flush to FS, if FS is a
46+   // / raw_fd_stream.
4047  const  uint64_t  FlushThreshold;
4148
4249  // / CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
43-   unsigned  CurBit;
50+   unsigned  CurBit =  0 ;
4451
4552  // / CurValue - The current value. Only bits < CurBit are valid.
46-   uint32_t  CurValue;
53+   uint32_t  CurValue =  0 ;
4754
4855  // / CurCodeSize - This is the declared size of code values used for the
4956  // / current block, in bits.
50-   unsigned  CurCodeSize;
57+   unsigned  CurCodeSize =  2 ;
5158
5259  // / BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
5360  // / selected BLOCK ID.
54-   unsigned  BlockInfoCurBID;
61+   unsigned  BlockInfoCurBID =  0 ;
5562
5663  // / CurAbbrevs - Abbrevs installed at in this block.
5764  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
5865
66+   //  Support for retrieving a section of the output, for purposes such as
67+   //  checksumming.
68+   std::optional<size_t > BlockFlushingStartPos;
69+ 
5970  struct  Block  {
6071    unsigned  PrevCodeSize;
6172    size_t  StartSizeWord;
@@ -77,47 +88,106 @@ class BitstreamWriter {
7788  void  WriteWord (unsigned  Value) {
7889    Value =
7990        support::endian::byte_swap<uint32_t , llvm::endianness::little>(Value);
80-     Out .append (reinterpret_cast <const  char  *>(&Value),
81-                reinterpret_cast <const  char  *>(&Value + 1 ));
91+     Buffer .append (reinterpret_cast <const  char  *>(&Value),
92+                    reinterpret_cast <const  char  *>(&Value + 1 ));
8293  }
8394
84-   uint64_t  GetNumOfFlushedBytes () const  { return  FS ? FS->tell () : 0 ; }
95+   uint64_t  GetNumOfFlushedBytes () const  {
96+     return  fdStream () ? fdStream ()->tell () : 0 ;
97+   }
8598
86-   size_t  GetBufferOffset () const  { return  Out.size () + GetNumOfFlushedBytes (); }
99+   size_t  GetBufferOffset () const  {
100+     return  Buffer.size () + GetNumOfFlushedBytes ();
101+   }
87102
88103  size_t  GetWordIndex () const  {
89104    size_t  Offset = GetBufferOffset ();
90105    assert ((Offset & 3 ) == 0  && " Not 32-bit aligned" 
91106    return  Offset / 4 ;
92107  }
93108
94-   // / If the related file stream supports reading, seeking and writing, flush
95-   // / the buffer if its size is above a threshold.
96-   void  FlushToFile () {
97-     if  (!FS)
109+   void  flushAndClear () {
110+     assert (FS);
111+     assert (!Buffer.empty ());
112+     assert (!BlockFlushingStartPos &&
113+            " a call to markAndBlockFlushing should have been paired with a " 
114+            " call to getMarkedBufferAndResumeFlushing" 
115+     FS->write (Buffer.data (), Buffer.size ());
116+     Buffer.clear ();
117+   }
118+ 
119+   // / If the related file stream is a raw_fd_stream, flush the buffer if its
120+   // / size is above a threshold. If \p OnClosing is true, flushing happens
121+   // / regardless of thresholds.
122+   void  FlushToFile (bool  OnClosing = false ) {
123+     if  (!FS || Buffer.empty ())
98124      return ;
99-     if  (Out.size () < FlushThreshold)
125+     if  (OnClosing)
126+       return  flushAndClear ();
127+     if  (BlockFlushingStartPos)
100128      return ;
101-     FS->write ((char  *)&Out.front (), Out.size ());
102-     Out.clear ();
129+     if  (fdStream () && Buffer.size () > FlushThreshold)
130+       flushAndClear ();
131+   }
132+ 
133+   raw_fd_stream *fdStream () { return  dyn_cast_or_null<raw_fd_stream>(FS); }
134+ 
135+   const  raw_fd_stream *fdStream () const  {
136+     return  dyn_cast_or_null<raw_fd_stream>(FS);
137+   }
138+ 
139+   SmallVectorImpl<char > &getInternalBufferFromStream (raw_ostream &OutStream) {
140+     if  (auto  *SV = dyn_cast<raw_svector_ostream>(&OutStream))
141+       return  SV->buffer ();
142+     return  OwnBuffer;
103143  }
104144
105145public: 
106-   // / Create a BitstreamWriter that writes to Buffer \p O.
146+   // / Create a BitstreamWriter over a raw_ostream \p OutStream.
147+   // / If \p OutStream is a raw_svector_ostream, the BitstreamWriter will write
148+   // / directly to the latter's buffer. In all other cases, the BitstreamWriter
149+   // / will use an internal buffer and flush at the end of its lifetime.
107150  // /
108-   // / \p FS is the file stream that \p O flushes to incrementally. If \p FS is
109-   // / null, \p O does not flush incrementially, but writes to disk at the end.
151+   // / In addition, if \p is a raw_fd_stream supporting seek, tell, and read
152+   // / (besides write), the BitstreamWriter will also flush incrementally, when a
153+   // / subblock is finished, and if the FlushThreshold is passed.
110154  // /
111-   // / \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is
112-   // / valid. Flushing only occurs at (sub)block boundaries.
113-   BitstreamWriter (SmallVectorImpl<char > &O, raw_fd_stream *FS = nullptr ,
114-                   uint32_t  FlushThreshold = 512 )
115-       : Out(O), FS(FS), FlushThreshold(uint64_t (FlushThreshold) << 20 ), CurBit(0 ),
116-         CurValue (0 ), CurCodeSize(2 ) {}
155+   // / NOTE: \p FlushThreshold's unit is MB.
156+   BitstreamWriter (raw_ostream &OutStream, uint32_t  FlushThreshold = 512 )
157+       : Buffer(getInternalBufferFromStream(OutStream)),
158+         FS (!isa<raw_svector_ostream>(OutStream) ? &OutStream : nullptr),
159+         FlushThreshold(uint64_t (FlushThreshold) << 20) {}
160+ 
161+   // / Convenience constructor for users that start with a vector - avoids
162+   // / needing to wrap it in a raw_svector_ostream.
163+   BitstreamWriter (SmallVectorImpl<char > &Buff)
164+       : Buffer(Buff), FS(nullptr ), FlushThreshold(0 ) {}
117165
118166  ~BitstreamWriter () {
119-     assert (CurBit ==  0  &&  " Unflushed data remaining " 
167+     FlushToWord ( );
120168    assert (BlockScope.empty () && CurAbbrevs.empty () && " Block imbalance" 
169+     FlushToFile (/* OnClosing=*/ true );
170+   }
171+ 
172+   // / For scenarios where the user wants to access a section of the stream to
173+   // / (for example) compute some checksum, disable flushing and remember the
174+   // / position in the internal buffer where that happened. Must be paired with a
175+   // / call to getMarkedBufferAndResumeFlushing.
176+   void  markAndBlockFlushing () {
177+     assert (!BlockFlushingStartPos);
178+     BlockFlushingStartPos = Buffer.size ();
179+   }
180+ 
181+   // / resumes flushing, but does not flush, and returns the section in the
182+   // / internal buffer starting from the position marked with
183+   // / markAndBlockFlushing. The return should be processed before any additional
184+   // / calls to this object, because those may cause a flush and invalidate the
185+   // / return.
186+   StringRef getMarkedBufferAndResumeFlushing () {
187+     assert (BlockFlushingStartPos);
188+     size_t  Start = *BlockFlushingStartPos;
189+     BlockFlushingStartPos.reset ();
190+     return  {&Buffer[Start], Buffer.size () - Start};
121191  }
122192
123193  // / Retrieve the current position in the stream, in bits.
@@ -141,16 +211,19 @@ class BitstreamWriter {
141211    if  (ByteNo >= NumOfFlushedBytes) {
142212      assert ((!endian::readAtBitAlignment<uint8_t , llvm::endianness::little,
143213                                          unaligned>(
144-                  &Out [ByteNo - NumOfFlushedBytes], StartBit)) &&
214+                  &Buffer [ByteNo - NumOfFlushedBytes], StartBit)) &&
145215             " Expected to be patching over 0-value placeholders" 
146216      endian::writeAtBitAlignment<uint8_t , llvm::endianness::little, unaligned>(
147-           &Out [ByteNo - NumOfFlushedBytes], NewByte, StartBit);
217+           &Buffer [ByteNo - NumOfFlushedBytes], NewByte, StartBit);
148218      return ;
149219    }
150220
221+     //  If we don't have a raw_fd_stream, GetNumOfFlushedBytes() should have
222+     //  returned 0, and we shouldn't be here.
223+     assert (fdStream () != nullptr );
151224    //  If the byte offset to backpatch is flushed, use seek to backfill data.
152225    //  First, save the file position to restore later.
153-     uint64_t  CurPos = FS ->tell ();
226+     uint64_t  CurPos = fdStream () ->tell ();
154227
155228    //  Copy data to update into Bytes from the file FS and the buffer Out.
156229    char  Bytes[3 ]; //  Use one more byte to silence a warning from Visual C++.
@@ -159,19 +232,19 @@ class BitstreamWriter {
159232    size_t  BytesFromBuffer = BytesNum - BytesFromDisk;
160233
161234    //  When unaligned, copy existing data into Bytes from the file FS and the
162-     //  buffer Out  so that it can be updated before writing. For debug builds
235+     //  buffer Buffer  so that it can be updated before writing. For debug builds
163236    //  read bytes unconditionally in order to check that the existing value is 0
164237    //  as expected.
165238#ifdef  NDEBUG
166239    if  (StartBit)
167240#endif 
168241    {
169-       FS ->seek (ByteNo);
170-       ssize_t  BytesRead = FS ->read (Bytes, BytesFromDisk);
242+       fdStream () ->seek (ByteNo);
243+       ssize_t  BytesRead = fdStream () ->read (Bytes, BytesFromDisk);
171244      (void )BytesRead; //  silence warning
172245      assert (BytesRead >= 0  && static_cast <size_t >(BytesRead) == BytesFromDisk);
173246      for  (size_t  i = 0 ; i < BytesFromBuffer; ++i)
174-         Bytes[BytesFromDisk + i] = Out [i];
247+         Bytes[BytesFromDisk + i] = Buffer [i];
175248      assert ((!endian::readAtBitAlignment<uint8_t , llvm::endianness::little,
176249                                          unaligned>(Bytes, StartBit)) &&
177250             " Expected to be patching over 0-value placeholders" 
@@ -182,13 +255,13 @@ class BitstreamWriter {
182255        Bytes, NewByte, StartBit);
183256
184257    //  Copy updated data back to the file FS and the buffer Out.
185-     FS ->seek (ByteNo);
186-     FS ->write (Bytes, BytesFromDisk);
258+     fdStream () ->seek (ByteNo);
259+     fdStream () ->write (Bytes, BytesFromDisk);
187260    for  (size_t  i = 0 ; i < BytesFromBuffer; ++i)
188-       Out [i] = Bytes[BytesFromDisk + i];
261+       Buffer [i] = Bytes[BytesFromDisk + i];
189262
190263    //  Restore the file position.
191-     FS ->seek (CurPos);
264+     fdStream () ->seek (CurPos);
192265  }
193266
194267  void  BackpatchHalfWord (uint64_t  BitNo, uint16_t  Val) {
@@ -481,11 +554,11 @@ class BitstreamWriter {
481554
482555    //  Emit literal bytes.
483556    assert (llvm::all_of (Bytes, [](UIntTy B) { return  isUInt<8 >(B); }));
484-     Out .append (Bytes.begin (), Bytes.end ());
557+     Buffer .append (Bytes.begin (), Bytes.end ());
485558
486559    //  Align end to 32-bits.
487560    while  (GetBufferOffset () & 3 )
488-       Out .push_back (0 );
561+       Buffer .push_back (0 );
489562  }
490563  void  emitBlob (StringRef Bytes, bool  ShouldEmitSize = true ) {
491564    emitBlob (ArrayRef ((const  uint8_t  *)Bytes.data (), Bytes.size ()),
0 commit comments